def test_conversion():

    np.random.seed(123)

    array1 = np.random.randn(10, 3)

    all_objects = {
        "a1": (array1, DataTypes.NumpyArray),
        "a2": (1 * (array1 > 0), DataTypes.NumpyArray),
        "a3": (array1[:, 1], DataTypes.NumpyArray),
        "df1": (pd.DataFrame(array1, columns=["A", "B", "C"]), DataTypes.DataFrame),
        "df2": (pd.DataFrame(1 * (array1 > 0), columns=["a", "b", "c"]), DataTypes.DataFrame),
        "s1": (sparse.csr_matrix(array1), DataTypes.SparseArray),
        "s2": (sparse.csr_matrix(1 * (array1 > 0)), DataTypes.SparseArray),
        # "dfs1":(pd.SparseDataFrame(sparse.csr_matrix(array1),columns=["A","B","C"]) , data_type.SparseDataFrame)
        # "dfs2":(pd.SparseDataFrame(sparse.csr_matrix(1*(array1 > 0)),columns=["a","b","c"]), data_type.SparseDataFrame)
    }

    for name, (obj, expected_type) in all_objects.items():
        assert get_type(obj) == expected_type

        converted = convert_to_dataframe(obj)
        assert get_type(converted) == DataTypes.DataFrame

        converted = convert_to_array(obj)
        assert get_type(converted) == DataTypes.NumpyArray

        converted = convert_to_sparsearray(obj)
        assert get_type(converted) == DataTypes.SparseArray

        # converted = convert_to_sparsedataframe(obj)
        # assert get_type(converted) == DataTypes.SparseDataFrame

    assert np.array_equal(convert_to_array(all_objects["df1"][0]), all_objects["a1"][0])
    assert np.array_equal(convert_to_array(all_objects["s1"][0]), all_objects["a1"][0])
Beispiel #2
0
def test_conversion():

    np.random.seed(123)

    array1 = np.random.randn(10, 3)

    all_objects = {
        "a1": (array1, DataTypes.NumpyArray),
        "a2": (1 * (array1 > 0), DataTypes.NumpyArray),
        "a3": (array1[:, 1], DataTypes.NumpyArray),
        "df1": (pd.DataFrame(array1, columns=["A", "B",
                                              "C"]), DataTypes.DataFrame),
        "df2": (pd.DataFrame(1 * (array1 > 0),
                             columns=["a", "b", "c"]), DataTypes.DataFrame),
        "s1": (sparse.csr_matrix(array1), DataTypes.SparseArray),
        "s2": (sparse.csr_matrix(1 * (array1 > 0)), DataTypes.SparseArray),
        # "dfs1":(pd.SparseDataFrame(sparse.csr_matrix(array1),columns=["A","B","C"]) , data_type.SparseDataFrame)
        # "dfs2":(pd.SparseDataFrame(sparse.csr_matrix(1*(array1 > 0)),columns=["a","b","c"]), data_type.SparseDataFrame)
    }

    if _IS_PD1:
        df1_cat = all_objects["df1"][0].copy()
        df1_cat["A"] = df1_cat["A"].astype("category")

        all_objects["df1_cat"] = (df1_cat, DataTypes.DataFrame)

    for name, (obj, expected_type) in all_objects.items():

        assert get_type(obj) == expected_type

        converted = convert_to_dataframe(obj)
        assert get_type(converted) == DataTypes.DataFrame

        converted = convert_to_array(obj)
        assert get_type(converted) == DataTypes.NumpyArray
        assert converted.dtype.kind in ("i", "f")

        converted = convert_to_sparsearray(obj)
        assert get_type(converted) == DataTypes.SparseArray

        # converted = convert_to_sparsedataframe(obj)
        # assert get_type(converted) == DataTypes.SparseDataFrame

    assert np.array_equal(convert_to_array(all_objects["df1"][0]),
                          all_objects["a1"][0])
    assert np.array_equal(convert_to_array(all_objects["s1"][0]),
                          all_objects["a1"][0])
def test_generic_hstack_sparse_and_category(with_cat, force_sparse):
    
    df = pd.DataFrame({"a":10+np.arange(10),"b":np.random.randn(10)})
    if with_cat:
        df["a"] = df["a"].astype("category")

    xx = convert_to_sparsearray(np.random.randint(0,1, size=(10,2)))

    concat = generic_hstack((df,xx), max_number_of_cells_for_non_sparse = 10 + (1-force_sparse) * 1000000)    
    
    assert concat.shape == (df.shape[0] , df.shape[1] + xx.shape[1])
    if force_sparse:
        assert get_type(concat) == DataTypes.SparseArray

    elif with_cat:
        assert concat.dtypes["a"] == "category"
        assert isinstance(concat, pd.DataFrame)