예제 #1
0
def test_passing_strings_to_variable_types_dfs():
    variable_types = find_variable_types()
    teams = pd.DataFrame({
        'id': range(3),
        'name': ['Breakers', 'Spirit', 'Thorns']
    })
    games = pd.DataFrame({
        'id': range(5),
        'home_team_id': [2, 2, 1, 0, 1],
        'away_team_id': [1, 0, 2, 1, 0],
        'home_team_score': [3, 0, 1, 0, 4],
        'away_team_score': [2, 1, 2, 0, 0]
    })
    entities = {
        'teams': (teams, 'id', None, {
            'name': 'text'
        }),
        'games': (games, 'id')
    }
    relationships = [('teams', 'id', 'games', 'home_team_id')]

    features = ft.dfs(entities,
                      relationships,
                      target_entity="teams",
                      features_only=True)
    name_class = features[0].entity['name'].__class__
    assert name_class == variable_types['text']
예제 #2
0
def test_passing_strings_to_variable_types_from_dataframe():
    variable_types = find_variable_types()
    reversed_variable_types = {str(v): k for k, v in variable_types.items()}
    reversed_variable_types['unknown variable'] = 'some unknown type string'

    es = EntitySet()
    dataframe = pd.DataFrame(columns=list(reversed_variable_types))
    with pytest.warns(
            UserWarning,
            match=
            'Variable type {} was unrecognized, Unknown variable type was used instead'
            .format('some unknown type string')):
        es.entity_from_dataframe(
            entity_id="reversed_variable_types",
            dataframe=dataframe,
            index="<class 'featuretools.variable_types.variable.Index'>",
            time_index=
            "<class 'featuretools.variable_types.variable.NumericTimeIndex'>",
            variable_types=reversed_variable_types)

    entity = es["reversed_variable_types"]
    reversed_variable_types["unknown variable"] = "unknown"
    for variable in entity.variables:
        variable_class = variable.__class__
        assert variable_class.type_string == reversed_variable_types[
            variable.id]
예제 #3
0
    def _create_variables(self, variable_types, index, time_index,
                          secondary_time_index):
        """Extracts the variables from a dataframe

        Args:
            variable_types (dict[str -> types/str/dict[str -> type]]) : An entity's
                variable_types dict maps string variable ids to types (:class:`.Variable`)
                or type_strings (str) or (type, kwargs) to pass keyword arguments to the Variable.
            index (str): Name of index column
            time_index (str or None): Name of time_index column
            secondary_time_index (dict[str: [str]]): Dictionary of secondary time columns
                that each map to a list of columns that depend on that secondary time
        """
        variables = []
        variable_types = variable_types.copy() or {}
        string_to_class_map = find_variable_types()
        for vid in variable_types.copy():
            vtype = variable_types[vid]
            if isinstance(vtype, str):
                if vtype in string_to_class_map:
                    variable_types[vid] = string_to_class_map[vtype]
                else:
                    variable_types[vid] = string_to_class_map['unknown']
                    warnings.warn(
                        "Variable type {} was unrecognized, Unknown variable type was used instead"
                        .format(vtype))

        if index not in variable_types:
            variable_types[index] = vtypes.Index

        link_vars = get_linked_vars(self)
        inferred_variable_types = infer_variable_types(self.df, link_vars,
                                                       variable_types,
                                                       time_index,
                                                       secondary_time_index)
        inferred_variable_types.update(variable_types)

        for v in inferred_variable_types:
            # TODO document how vtype can be tuple
            vtype = inferred_variable_types[v]
            if isinstance(vtype, tuple):
                # vtype is (ft.Variable, dict_of_kwargs)
                _v = vtype[0](v, self, **vtype[1])
            else:
                _v = inferred_variable_types[v](v, self)
            variables += [_v]
        # convert data once we've inferred
        self.df = convert_all_variable_data(
            df=self.df, variable_types=inferred_variable_types)
        # make sure index is at the beginning
        index_variable = [v for v in variables if v.id == index][0]
        self.variables = [index_variable
                          ] + [v for v in variables if v.id != index]
예제 #4
0
def test_all_variable_descriptions():
    variable_types = find_variable_types()
    es = EntitySet()
    dataframe = pd.DataFrame(columns=list(variable_types))
    es.entity_from_dataframe(
        'variable_types',
        dataframe,
        index='index',
        time_index='datetime_time_index',
        variable_types=variable_types,
    )
    entity = es['variable_types']
    for variable in entity.variables:
        description = variable.to_data_description()
        _variable = deserialize.description_to_variable(description, entity=entity)
        assert variable.__eq__(_variable)
예제 #5
0
def description_to_variable(description, entity=None):
    '''Deserialize variable from variable description.

    Args:
        description (dict) : Description of :class:`.Variable`.
        entity (Entity) : Instance of :class:`.Entity` to add :class:`.Variable`. If entity is None, :class:`.Variable` will not be instantiated.

    Returns:
        variable (Variable) : Returns :class:`.Variable`.
    '''
    variable_types = find_variable_types()
    is_type_string = isinstance(description['type'], str)
    type = description['type'] if is_type_string else description['type'].pop('value')
    variable = variable_types.get(type, variable_types.get('None'))  # 'None' will return the Unknown variable type
    if entity is not None:
        kwargs = {} if is_type_string else description['type']
        variable = variable(description['id'], entity, **kwargs)
        variable.interesting_values = description['properties']['interesting_values']
    return variable
예제 #6
0
def description_to_variable(description, entity=None):
    '''Deserialize variable from variable description.

    Args:
        description (dict) : Description of :class:`.Variable`.
        entity (Entity) : Instance of :class:`.Entity` to add :class:`.Variable`. If entity is None, :class:`.Variable` will not be instantiated.

    Returns:
        variable (Variable) : Returns :class:`.Variable`.
    '''
    is_type_string = isinstance(description['type'], str)
    variable = description['type'] if is_type_string else description['type'].pop('value')
    if entity is not None:
        variable_types = find_variable_types()
        variable_class = variable_types.get(variable, variable_types.get('unknown'))
        kwargs = {} if is_type_string else description['type']
        variable = variable_class(description['id'], entity, **kwargs)
        interesting_values = pd.read_json(description['properties']['interesting_values'], typ='series')
        variable.interesting_values = interesting_values
    return variable