예제 #1
0
    def test_converts_variable_type_after_init(self):
        df = pd.DataFrame({
            'id': [0, 1, 2],
            'category': ['a', 'b', 'a'],
            'ints': ['1', '2', '1']
        })

        df["category"] = df["category"].astype("category")

        entityset = EntitySet(id='test')
        entityset.entity_from_dataframe(entity_id='test_entity',
                                        index='id',
                                        dataframe=df)
        e = entityset['test_entity']
        df = entityset.get_dataframe('test_entity')

        e.convert_variable_type('ints', variable_types.Numeric)
        assert isinstance(e['ints'], variable_types.Numeric)
        assert df[
            'ints'].dtype.name in variable_types.PandasTypes._pandas_numerics

        e.convert_variable_type('ints', variable_types.Categorical)
        assert isinstance(e['ints'], variable_types.Categorical)

        e.convert_variable_type('ints', variable_types.Ordinal)
        assert isinstance(e['ints'], variable_types.Ordinal)

        e.convert_variable_type('ints',
                                variable_types.Boolean,
                                true_val=1,
                                false_val=2)
        assert isinstance(e['ints'], variable_types.Boolean)
        assert df['ints'].dtype.name == 'bool'
예제 #2
0
    def test_converts_variable_type_after_init(self):
        df = pd.DataFrame({'id': [0, 1, 2],
                           'category': ['a', 'b', 'a'],
                           'ints': ['1', '2', '1']})

        df["category"] = df["category"].astype("category")

        entityset = EntitySet(id='test')
        entityset.entity_from_dataframe(entity_id='test_entity', index='id',
                                        dataframe=df)
        e = entityset['test_entity']
        df = entityset.get_dataframe('test_entity')

        e.convert_variable_type('ints', variable_types.Numeric)
        assert isinstance(e['ints'], variable_types.Numeric)
        assert df['ints'].dtype.name in variable_types.PandasTypes._pandas_numerics

        e.convert_variable_type('ints', variable_types.Categorical)
        assert isinstance(e['ints'], variable_types.Categorical)

        e.convert_variable_type('ints', variable_types.Ordinal)
        assert isinstance(e['ints'], variable_types.Ordinal)

        e.convert_variable_type('ints', variable_types.Boolean,
                                true_val=1, false_val=2)
        assert isinstance(e['ints'], variable_types.Boolean)
        assert df['ints'].dtype.name == 'bool'
예제 #3
0
    def test_converts_variable_types_on_init(self):
        df = pd.DataFrame({
            'id': [0, 1, 2],
            'category': ['a', 'b', 'a'],
            'category_int': [1, 2, 3],
            'ints': ['1', '2', '3'],
            'floats': ['1', '2', '3.0']
        })
        df["category_int"] = df["category_int"].astype("category")

        vtypes = {
            'id': variable_types.Categorical,
            'ints': variable_types.Numeric,
            'floats': variable_types.Numeric
        }
        entityset = EntitySet(id='test')
        entityset.entity_from_dataframe(entity_id='test_entity',
                                        index='id',
                                        variable_types=vtypes,
                                        dataframe=df)

        entity_df = entityset.get_dataframe('test_entity')
        assert entity_df[
            'ints'].dtype.name in variable_types.PandasTypes._pandas_numerics
        assert entity_df[
            'floats'].dtype.name in variable_types.PandasTypes._pandas_numerics

        # this is infer from pandas dtype
        e = entityset["test_entity"]
        assert isinstance(e['category_int'], variable_types.Categorical)
예제 #4
0
    def test_converts_variable_types_on_init(self):
        df = pd.DataFrame({'id': [0, 1, 2],
                           'category': ['a', 'b', 'a'],
                           'category_int': [1, 2, 3],
                           'ints': ['1', '2', '3'],
                           'floats': ['1', '2', '3.0']})
        df["category_int"] = df["category_int"].astype("category")

        vtypes = {'id': variable_types.Categorical,
                  'ints': variable_types.Numeric,
                  'floats': variable_types.Numeric}
        entityset = EntitySet(id='test')
        entityset.entity_from_dataframe(entity_id='test_entity', index='id',
                                        variable_types=vtypes, dataframe=df)

        entity_df = entityset.get_dataframe('test_entity')
        assert entity_df['ints'].dtype.name in variable_types.PandasTypes._pandas_numerics
        assert entity_df['floats'].dtype.name in variable_types.PandasTypes._pandas_numerics

        # this is infer from pandas dtype
        e = entityset["test_entity"]
        assert isinstance(e['category_int'], variable_types.Categorical)