def convert_variable_type(self, variable_id, new_type, convert_data=True, **kwargs): """Convert variable in dataframe to different type Args: variable_id (str) : Id of variable to convert. new_type (subclass of `Variable`) : Type of variable to convert to. entityset (:class:`.BaseEntitySet`) : EntitySet associated with this entity. convert_data (bool) : If True, convert underlying data in the EntitySet. Raises: RuntimeError : Raises if it cannot convert the underlying data Examples: >>> es["customer"].convert_variable_type("education_level", vtypes.Categorical, EntitySet) True """ if convert_data: # first, convert the underlying data (or at least try to) self.df = convert_variable_data(df=self.df, column_id=variable_id, new_type=new_type, **kwargs) # replace the old variable with the new one, maintaining order variable = self._get_variable(variable_id) new_variable = new_type.create_from(variable) self.variables[self.variables.index(variable)] = new_variable
def test_convert_variable_data(): df = pd.DataFrame({ 'id': [0, 1, 2], 'category': ['a', 'b', 'a'], 'ints': ['1', '2', '1'], 'boolean': [True, False, True], 'date': ['3/11/2000', '3/12/2000', '3/13/2000'], 'integers': [1, 2, 1] }) # Categorical -> Numeric init_dtype = df['ints'].dtype.name df = convert_variable_data(df=df, column_id='ints', new_type=vtypes.Numeric) assert init_dtype != df['ints'].dtype.name assert df['ints'].dtype.name in vtypes.PandasTypes._pandas_numerics # Numeric -> Boolean init_dtype = df['ints'].dtype.name df = convert_variable_data(df=df, column_id='ints', new_type=vtypes.Boolean, true_val=1, false_val=2) assert init_dtype != df['ints'].dtype.name # Categorical -> Datetime init_dtype = df['date'].dtype.name df = convert_variable_data(df=df, column_id='date', new_type=vtypes.Datetime) assert init_dtype != df['date'].dtype.name assert df['date'].dtype.name in vtypes.PandasTypes._pandas_datetimes