Esempio n. 1
0
 def test_join_on_fails_with_different_left_index(self):
     df = DataFrame(
         {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)},
         index=tm.makeCustomIndex(3, 2),
     )
     df2 = DataFrame(
         {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}
     )
     msg = r'len\(right_on\) must equal the number of levels in the index of "left"'
     with pytest.raises(ValueError, match=msg):
         merge(df, df2, right_on="b", left_index=True)
Esempio n. 2
0
 def test_join_on_fails_with_different_column_counts(self):
     df = DataFrame(
         {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)}
     )
     df2 = DataFrame(
         {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)},
         index=tm.makeCustomIndex(10, 2),
     )
     msg = r"len\(right_on\) must equal len\(left_on\)"
     with pytest.raises(ValueError, match=msg):
         merge(df, df2, right_on="a", left_on=["a", "b"])
Esempio n. 3
0
def metadata_column_headers(request: FixtureRequest) -> List[str]:
    """Make a list of metadata column headers.

    Returns:
        list: A metadata column header list whose length is between 0
            and `MAX_METADATA_COLS`.
    """
    if request.param == 0:
        return list()
    # pandas bug (?) in makeCustomIndex when nentries = 1
    elif request.param == 1:
        return ["M_l0_g0"]
    else:
        return list(makeCustomIndex(request.param, 1, prefix="M"))
    def f(df):
        return df["close"] / df["open"]

    # it works!
    result = grouped.apply(f)
    tm.assert_index_equal(result.index, df.index)


@pytest.mark.parametrize(
    "name, func",
    [
        ("Int64Index", tm.makeIntIndex),
        ("Index", tm.makeUnicodeIndex),
        ("Float64Index", tm.makeFloatIndex),
        ("MultiIndex", lambda m: tm.makeCustomIndex(m, 2)),
    ],
)
def test_fails_on_no_datetime_index(name, func):
    n = 2
    index = func(n)
    df = DataFrame({"a": np.random.randn(n)}, index=index)

    msg = (
        "Only valid with DatetimeIndex, TimedeltaIndex "
        f"or PeriodIndex, but got an instance of '{name}'"
    )
    with pytest.raises(TypeError, match=msg):
        df.groupby(Grouper(freq="D"))

Esempio n. 5
0
class TestResolvePath(object):
    archive = ".\\"
    headers: Index = makeCustomIndex(7, 1, prefix="C")

    multi_schema = SortedDict({"0": headers[1], "1": headers[3]})
    multi_schema2 = SortedDict({
        "0": headers[0],
        "1": headers[2],
        "2": headers[3]
    })
    multi_schema3 = SortedDict({"0": headers[1], "1": headers[6]})

    single_schema = SortedDict({"0": headers[0]})
    single_schema2 = SortedDict({"0": headers[4]})

    @staticmethod
    def data_gen_invalid(row: int, col: int) -> Union[float, str]:
        # Invalid data has:
        # 1) multiple data values per column (excluding NaNs and empty strings).
        # fmt: off
        valmap_invalid: List[List[Union[float, str]]] = [
            ["val", "val", "xxx", "val"],
            ["val", "", "val", "val"],  # noqa: E241
            ["", "val", "val", nan],  # noqa: E201, E241
            [nan, nan, "val", "xxx"],  # noqa: E201, E241
            ["xxx", nan, "", nan],  # noqa: E241
        ]
        # fmt: on

        if row < len(valmap_invalid):
            if col < len(valmap_invalid[row]):
                return valmap_invalid[row][col]
        # Use pandas' value generation function if the given (row, column) falls
        # outside our pre-defined invalid value map.
        return make_dataframe_value(row, col)

    @staticmethod
    def data_gen(row: int, col: int) -> Union[float, str]:
        # Valid data has:
        # 1) the same data value for each column (excluding NaNs and empty strings).
        # fmt: off
        valmap: List[List[Union[float, str]]] = [
            ["val", "val", "val", "val"],
            ["val", "", "val", "val"],  # noqa: E241
            ["", "val", "val", nan],  # noqa: E201, E241
            [nan, nan, "val", "val"],  # noqa: E201, E241
            ["val", nan, "", nan],  # noqa: E241
        ]
        # fmt: on

        if row < len(valmap):
            if col < len(valmap[row]):
                return valmap[row][col]
        # Fall back on pandas' value generator.
        return make_dataframe_value(row, col)

    @staticmethod
    def data_gen_normalizable(row: int, col: int) -> Union[float, str]:
        # A valid data map containing data values that will be normalized by
        # `syphon.schema.resolvepath._normalize`.
        # fmt: off
        valmap: List[List[Union[float, str]]] = [
            ["Value 1.", "Value 1.", "Value 1.", "Value 1."],
            ["Value 1.", nan, "Value 1.", "Value 1."],  # noqa: E241
            ["Value 1.", nan, "Value 1.", nan],  # noqa: E241
            [nan, nan, "Value 1.", nan],  # noqa: E201, E241
            [nan, nan, "Value 1.", nan],  # noqa: E201, E241
        ]
        # fmt: on

        if row < len(valmap):
            if col < len(valmap[row]):
                return valmap[row][col]
        # Fall back on pandas' value generator.
        return make_dataframe_value(row, col)

    @pytest.mark.parametrize(
        "schema, expected",
        [
            (single_schema, join(archive, "val")),
            (multi_schema, join(archive, "val", "val")),
            (multi_schema2, join(archive, "val", "val", "val")),
        ],
    )
    def test_resolve_path(self, schema: SortedDict, expected: str):
        data: DataFrame = make_dataframe(5,
                                         4,
                                         data_gen_f=TestResolvePath.data_gen)
        actual: str = resolve_path(self.archive, schema, data)

        assert actual == expected

    @pytest.mark.parametrize(
        "schema, expected",
        [
            (single_schema, join(archive, "value_1")),
            (multi_schema, join(archive, "value_1", "value_1")),
            (multi_schema2, join(archive, "value_1", "value_1", "value_1")),
        ],
    )
    def test_resolve_path_normalized(self, schema: SortedDict, expected: str):
        data: DataFrame = make_dataframe(
            5, 4, data_gen_f=TestResolvePath.data_gen_normalizable)
        actual: str = resolve_path(self.archive, schema, data)

        assert actual == expected

    @pytest.mark.parametrize("schema", [single_schema2, multi_schema3])
    def test_resolve_path_indexerror(self, schema: SortedDict):
        data: DataFrame = make_dataframe(5,
                                         4,
                                         data_gen_f=TestResolvePath.data_gen)

        with pytest.raises(IndexError):
            resolve_path(self.archive, schema, data)

    @pytest.mark.parametrize("schema",
                             [single_schema, multi_schema, multi_schema2])
    def test_resolve_path_valueerror(self, schema: SortedDict):
        data: DataFrame = make_dataframe(
            5, 4, data_gen_f=TestResolvePath.data_gen_invalid)

        with pytest.raises(ValueError):
            resolve_path(self.archive, schema, data)