def test_convert_all_variable_data(): df = pd.DataFrame({ 'id': [0, 1, 2], 'category': ['a', 'b', 'a'], 'ints': ['1', '2', '1'], 'boolean': [True, False, True], 'date': ['3/11/2000', '3/12/2000', '3/13/2000'], 'integers': [1, 2, 1], 'latlong': [np.nan, (10, 4), (np.nan, 4)] }) variable_types = { 'id': vtypes.Numeric, 'category': vtypes.Categorical, 'ints': vtypes.Numeric, 'boolean': vtypes.Boolean, 'date': vtypes.Datetime, 'integers': vtypes.Numeric, 'latlong': vtypes.LatLong } df = convert_all_variable_data(df, variable_types) assert df['id'].dtype.name in vtypes.PandasTypes._pandas_numerics assert df['category'].dtype.name == 'object' assert df['ints'].dtype.name in vtypes.PandasTypes._pandas_numerics assert df['boolean'].dtype.name == 'bool' assert df['date'].dtype.name in vtypes.PandasTypes._pandas_datetimes assert df['integers'].dtype.name in vtypes.PandasTypes._pandas_numerics # confirm `nan` value in latlong is replaced by `(nan, nan)` assert df['latlong'][0] == (np.nan, np.nan)
def test_convert_all_variable_data(): df = pd.DataFrame({ 'id': [0, 1, 2], 'category': ['a', 'b', 'a'], 'ints': ['1', '2', '1'], 'boolean': [True, False, True], 'date': ['3/11/2000', '3/12/2000', '3/13/2000'], 'integers': [1, 2, 1] }) variable_types = { 'id': vtypes.Numeric, 'category': vtypes.Categorical, 'ints': vtypes.Numeric, 'boolean': vtypes.Boolean, 'date': vtypes.Datetime, 'integers': vtypes.Numeric } df = convert_all_variable_data(df, variable_types) assert df['id'].dtype.name in vtypes.PandasTypes._pandas_numerics assert df['category'].dtype.name == 'object' assert df['ints'].dtype.name in vtypes.PandasTypes._pandas_numerics assert df['boolean'].dtype.name == 'bool' assert df['date'].dtype.name in vtypes.PandasTypes._pandas_datetimes assert df['integers'].dtype.name in vtypes.PandasTypes._pandas_numerics
def _create_variables(self, variable_types, index, time_index, secondary_time_index): """Extracts the variables from a dataframe Args: variable_types (dict[str -> types/str/dict[str -> type]]) : An entity's variable_types dict maps string variable ids to types (:class:`.Variable`) or type_strings (str) or (type, kwargs) to pass keyword arguments to the Variable. index (str): Name of index column time_index (str or None): Name of time_index column secondary_time_index (dict[str: [str]]): Dictionary of secondary time columns that each map to a list of columns that depend on that secondary time """ variables = [] variable_types = variable_types.copy() or {} string_to_class_map = find_variable_types() # TODO: Remove once Text has been removed from variable types string_to_class_map[Text.type_string] = Text for vid in variable_types.copy(): vtype = variable_types[vid] if isinstance(vtype, str): if vtype in string_to_class_map: variable_types[vid] = string_to_class_map[vtype] else: variable_types[vid] = string_to_class_map['unknown'] warnings.warn("Variable type {} was unrecognized, Unknown variable type was used instead".format(vtype)) if index not in variable_types: variable_types[index] = vtypes.Index link_vars = get_linked_vars(self) inferred_variable_types = infer_variable_types(self.df, link_vars, variable_types, time_index, secondary_time_index) inferred_variable_types.update(variable_types) for v in inferred_variable_types: # TODO document how vtype can be tuple vtype = inferred_variable_types[v] if isinstance(vtype, tuple): # vtype is (ft.Variable, dict_of_kwargs) _v = vtype[0](v, self, **vtype[1]) else: _v = inferred_variable_types[v](v, self) variables += [_v] # convert data once we've inferred self.df = convert_all_variable_data(df=self.df, variable_types=inferred_variable_types) # make sure index is at the beginning index_variable = [v for v in variables if v.id == index][0] self.variables = [index_variable] + [v for v in variables if v.id != index]
def _create_variables(self, variable_types, index, time_index, secondary_time_index): """Extracts the variables from a dataframe Args: variable_types (dict[str -> dict[str -> type]]) : An entity's variable_types dict maps string variable ids to types (:class:`.Variable`) or (type, kwargs) to pass keyword arguments to the Variable. index (str): Name of index column time_index (str or None): Name of time_index column secondary_time_index (dict[str: [str]]): Dictionary of secondary time columns that each map to a list of columns that depend on that secondary time """ variables = [] variable_types = variable_types or {} if index not in variable_types: variable_types[index] = vtypes.Index link_vars = get_linked_vars(self) inferred_variable_types = infer_variable_types(self.df, link_vars, variable_types, time_index, secondary_time_index) inferred_variable_types.update(variable_types) for v in inferred_variable_types: # TODO document how vtype can be tuple vtype = inferred_variable_types[v] if isinstance(vtype, tuple): # vtype is (ft.Variable, dict_of_kwargs) _v = vtype[0](v, self, **vtype[1]) else: _v = inferred_variable_types[v](v, self) variables += [_v] # convert data once we've inferred self.df = convert_all_variable_data(df=self.df, variable_types=inferred_variable_types) # make sure index is at the beginning index_variable = [v for v in variables if v.id == index][0] self.variables = [index_variable] + [v for v in variables if v.id != index]