def test_join_on_fails_with_different_left_index(self): df = DataFrame( {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)}, index=tm.makeCustomIndex(3, 2), ) df2 = DataFrame( {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)} ) msg = r'len\(right_on\) must equal the number of levels in the index of "left"' with pytest.raises(ValueError, match=msg): merge(df, df2, right_on="b", left_index=True)
def test_join_on_fails_with_different_column_counts(self): df = DataFrame( {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)} ) df2 = DataFrame( {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}, index=tm.makeCustomIndex(10, 2), ) msg = r"len\(right_on\) must equal len\(left_on\)" with pytest.raises(ValueError, match=msg): merge(df, df2, right_on="a", left_on=["a", "b"])
def metadata_column_headers(request: FixtureRequest) -> List[str]: """Make a list of metadata column headers. Returns: list: A metadata column header list whose length is between 0 and `MAX_METADATA_COLS`. """ if request.param == 0: return list() # pandas bug (?) in makeCustomIndex when nentries = 1 elif request.param == 1: return ["M_l0_g0"] else: return list(makeCustomIndex(request.param, 1, prefix="M"))
def f(df): return df["close"] / df["open"] # it works! result = grouped.apply(f) tm.assert_index_equal(result.index, df.index) @pytest.mark.parametrize( "name, func", [ ("Int64Index", tm.makeIntIndex), ("Index", tm.makeUnicodeIndex), ("Float64Index", tm.makeFloatIndex), ("MultiIndex", lambda m: tm.makeCustomIndex(m, 2)), ], ) def test_fails_on_no_datetime_index(name, func): n = 2 index = func(n) df = DataFrame({"a": np.random.randn(n)}, index=index) msg = ( "Only valid with DatetimeIndex, TimedeltaIndex " f"or PeriodIndex, but got an instance of '{name}'" ) with pytest.raises(TypeError, match=msg): df.groupby(Grouper(freq="D"))
class TestResolvePath(object): archive = ".\\" headers: Index = makeCustomIndex(7, 1, prefix="C") multi_schema = SortedDict({"0": headers[1], "1": headers[3]}) multi_schema2 = SortedDict({ "0": headers[0], "1": headers[2], "2": headers[3] }) multi_schema3 = SortedDict({"0": headers[1], "1": headers[6]}) single_schema = SortedDict({"0": headers[0]}) single_schema2 = SortedDict({"0": headers[4]}) @staticmethod def data_gen_invalid(row: int, col: int) -> Union[float, str]: # Invalid data has: # 1) multiple data values per column (excluding NaNs and empty strings). # fmt: off valmap_invalid: List[List[Union[float, str]]] = [ ["val", "val", "xxx", "val"], ["val", "", "val", "val"], # noqa: E241 ["", "val", "val", nan], # noqa: E201, E241 [nan, nan, "val", "xxx"], # noqa: E201, E241 ["xxx", nan, "", nan], # noqa: E241 ] # fmt: on if row < len(valmap_invalid): if col < len(valmap_invalid[row]): return valmap_invalid[row][col] # Use pandas' value generation function if the given (row, column) falls # outside our pre-defined invalid value map. return make_dataframe_value(row, col) @staticmethod def data_gen(row: int, col: int) -> Union[float, str]: # Valid data has: # 1) the same data value for each column (excluding NaNs and empty strings). # fmt: off valmap: List[List[Union[float, str]]] = [ ["val", "val", "val", "val"], ["val", "", "val", "val"], # noqa: E241 ["", "val", "val", nan], # noqa: E201, E241 [nan, nan, "val", "val"], # noqa: E201, E241 ["val", nan, "", nan], # noqa: E241 ] # fmt: on if row < len(valmap): if col < len(valmap[row]): return valmap[row][col] # Fall back on pandas' value generator. return make_dataframe_value(row, col) @staticmethod def data_gen_normalizable(row: int, col: int) -> Union[float, str]: # A valid data map containing data values that will be normalized by # `syphon.schema.resolvepath._normalize`. # fmt: off valmap: List[List[Union[float, str]]] = [ ["Value 1.", "Value 1.", "Value 1.", "Value 1."], ["Value 1.", nan, "Value 1.", "Value 1."], # noqa: E241 ["Value 1.", nan, "Value 1.", nan], # noqa: E241 [nan, nan, "Value 1.", nan], # noqa: E201, E241 [nan, nan, "Value 1.", nan], # noqa: E201, E241 ] # fmt: on if row < len(valmap): if col < len(valmap[row]): return valmap[row][col] # Fall back on pandas' value generator. return make_dataframe_value(row, col) @pytest.mark.parametrize( "schema, expected", [ (single_schema, join(archive, "val")), (multi_schema, join(archive, "val", "val")), (multi_schema2, join(archive, "val", "val", "val")), ], ) def test_resolve_path(self, schema: SortedDict, expected: str): data: DataFrame = make_dataframe(5, 4, data_gen_f=TestResolvePath.data_gen) actual: str = resolve_path(self.archive, schema, data) assert actual == expected @pytest.mark.parametrize( "schema, expected", [ (single_schema, join(archive, "value_1")), (multi_schema, join(archive, "value_1", "value_1")), (multi_schema2, join(archive, "value_1", "value_1", "value_1")), ], ) def test_resolve_path_normalized(self, schema: SortedDict, expected: str): data: DataFrame = make_dataframe( 5, 4, data_gen_f=TestResolvePath.data_gen_normalizable) actual: str = resolve_path(self.archive, schema, data) assert actual == expected @pytest.mark.parametrize("schema", [single_schema2, multi_schema3]) def test_resolve_path_indexerror(self, schema: SortedDict): data: DataFrame = make_dataframe(5, 4, data_gen_f=TestResolvePath.data_gen) with pytest.raises(IndexError): resolve_path(self.archive, schema, data) @pytest.mark.parametrize("schema", [single_schema, multi_schema, multi_schema2]) def test_resolve_path_valueerror(self, schema: SortedDict): data: DataFrame = make_dataframe( 5, 4, data_gen_f=TestResolvePath.data_gen_invalid) with pytest.raises(ValueError): resolve_path(self.archive, schema, data)