def test_convert_all_variable_data():

    df = pd.DataFrame({
        'id': [0, 1, 2],
        'category': ['a', 'b', 'a'],
        'ints': ['1', '2', '1'],
        'boolean': [True, False, True],
        'date': ['3/11/2000', '3/12/2000', '3/13/2000'],
        'integers': [1, 2, 1],
        'latlong': [np.nan, (10, 4), (np.nan, 4)]
    })

    variable_types = {
        'id': vtypes.Numeric,
        'category': vtypes.Categorical,
        'ints': vtypes.Numeric,
        'boolean': vtypes.Boolean,
        'date': vtypes.Datetime,
        'integers': vtypes.Numeric,
        'latlong': vtypes.LatLong
    }

    df = convert_all_variable_data(df, variable_types)

    assert df['id'].dtype.name in vtypes.PandasTypes._pandas_numerics
    assert df['category'].dtype.name == 'object'
    assert df['ints'].dtype.name in vtypes.PandasTypes._pandas_numerics
    assert df['boolean'].dtype.name == 'bool'
    assert df['date'].dtype.name in vtypes.PandasTypes._pandas_datetimes
    assert df['integers'].dtype.name in vtypes.PandasTypes._pandas_numerics
    # confirm `nan` value in latlong is replaced by `(nan, nan)`
    assert df['latlong'][0] == (np.nan, np.nan)
def test_convert_all_variable_data():

    df = pd.DataFrame({
        'id': [0, 1, 2],
        'category': ['a', 'b', 'a'],
        'ints': ['1', '2', '1'],
        'boolean': [True, False, True],
        'date': ['3/11/2000', '3/12/2000', '3/13/2000'],
        'integers': [1, 2, 1]
    })

    variable_types = {
        'id': vtypes.Numeric,
        'category': vtypes.Categorical,
        'ints': vtypes.Numeric,
        'boolean': vtypes.Boolean,
        'date': vtypes.Datetime,
        'integers': vtypes.Numeric
    }

    df = convert_all_variable_data(df, variable_types)

    assert df['id'].dtype.name in vtypes.PandasTypes._pandas_numerics
    assert df['category'].dtype.name == 'object'
    assert df['ints'].dtype.name in vtypes.PandasTypes._pandas_numerics
    assert df['boolean'].dtype.name == 'bool'
    assert df['date'].dtype.name in vtypes.PandasTypes._pandas_datetimes
    assert df['integers'].dtype.name in vtypes.PandasTypes._pandas_numerics
Exemple #3
0
    def _create_variables(self, variable_types, index, time_index, secondary_time_index):
        """Extracts the variables from a dataframe

        Args:
            variable_types (dict[str -> types/str/dict[str -> type]]) : An entity's
                variable_types dict maps string variable ids to types (:class:`.Variable`)
                or type_strings (str) or (type, kwargs) to pass keyword arguments to the Variable.
            index (str): Name of index column
            time_index (str or None): Name of time_index column
            secondary_time_index (dict[str: [str]]): Dictionary of secondary time columns
                that each map to a list of columns that depend on that secondary time
        """
        variables = []
        variable_types = variable_types.copy() or {}
        string_to_class_map = find_variable_types()
        # TODO: Remove once Text has been removed from variable types
        string_to_class_map[Text.type_string] = Text
        for vid in variable_types.copy():
            vtype = variable_types[vid]
            if isinstance(vtype, str):
                if vtype in string_to_class_map:
                    variable_types[vid] = string_to_class_map[vtype]
                else:
                    variable_types[vid] = string_to_class_map['unknown']
                    warnings.warn("Variable type {} was unrecognized, Unknown variable type was used instead".format(vtype))

        if index not in variable_types:
            variable_types[index] = vtypes.Index

        link_vars = get_linked_vars(self)
        inferred_variable_types = infer_variable_types(self.df,
                                                       link_vars,
                                                       variable_types,
                                                       time_index,
                                                       secondary_time_index)
        inferred_variable_types.update(variable_types)

        for v in inferred_variable_types:
            # TODO document how vtype can be tuple
            vtype = inferred_variable_types[v]
            if isinstance(vtype, tuple):
                # vtype is (ft.Variable, dict_of_kwargs)
                _v = vtype[0](v, self, **vtype[1])
            else:
                _v = inferred_variable_types[v](v, self)
            variables += [_v]
        # convert data once we've inferred
        self.df = convert_all_variable_data(df=self.df,
                                            variable_types=inferred_variable_types)
        # make sure index is at the beginning
        index_variable = [v for v in variables
                          if v.id == index][0]
        self.variables = [index_variable] + [v for v in variables
                                             if v.id != index]
Exemple #4
0
    def _create_variables(self, variable_types, index, time_index, secondary_time_index):
        """Extracts the variables from a dataframe

        Args:
            variable_types (dict[str -> dict[str -> type]]) : An entity's
                variable_types dict maps string variable ids to types (:class:`.Variable`)
                or (type, kwargs) to pass keyword arguments to the Variable.
            index (str): Name of index column
            time_index (str or None): Name of time_index column
            secondary_time_index (dict[str: [str]]): Dictionary of secondary time columns
                that each map to a list of columns that depend on that secondary time
        """
        variables = []
        variable_types = variable_types or {}
        if index not in variable_types:
            variable_types[index] = vtypes.Index

        link_vars = get_linked_vars(self)
        inferred_variable_types = infer_variable_types(self.df,
                                                       link_vars,
                                                       variable_types,
                                                       time_index,
                                                       secondary_time_index)
        inferred_variable_types.update(variable_types)

        for v in inferred_variable_types:
            # TODO document how vtype can be tuple
            vtype = inferred_variable_types[v]
            if isinstance(vtype, tuple):
                # vtype is (ft.Variable, dict_of_kwargs)
                _v = vtype[0](v, self, **vtype[1])
            else:
                _v = inferred_variable_types[v](v, self)
            variables += [_v]
        # convert data once we've inferred
        self.df = convert_all_variable_data(df=self.df,
                                            variable_types=inferred_variable_types)
        # make sure index is at the beginning
        index_variable = [v for v in variables
                          if v.id == index][0]
        self.variables = [index_variable] + [v for v in variables
                                             if v.id != index]