def description_to_entityset(description, **kwargs): '''Deserialize entityset from data description. Args: description (dict) : Description of an :class:`.EntitySet`. Likely generated using :meth:`.serialize.entityset_to_description` kwargs (keywords): Additional keyword arguments to pass as keywords arguments to the underlying deserialization method. Returns: entityset (EntitySet) : Instance of :class:`.EntitySet`. ''' check_schema_version(description, 'entityset') from featuretools.entityset import EntitySet # If data description was not read from disk, path is None. path = description.get('path') entityset = EntitySet(description['id']) last_time_index = [] for entity in description['entities'].values(): entity['loading_info']['params'].update(kwargs) # If path is None, an empty dataframe will be created for entity. description_to_entity(entity, entityset, path=path) if entity['properties']['last_time_index']: last_time_index.append(entity['id']) for relationship in description['relationships']: relationship = Relationship.from_dictionary(relationship, entityset) entityset.add_relationship(relationship) if len(last_time_index): entityset.add_last_time_indexes(updated_entities=last_time_index) return entityset
def description_to_entityset(description, **kwargs): '''Deserialize entityset from data description. Args: description (dict) : Description of an :class:`.EntitySet`. Likely generated using :meth:`.serialize.entityset_to_description` kwargs (keywords): Additional keyword arguments to pass as keywords arguments to the underlying deserialization method. Returns: entityset (EntitySet) : Instance of :class:`.EntitySet`. ''' check_schema_version(description, 'entityset') from featuretools.entityset import EntitySet # If data description was not read from disk, path is None. path = description.get('path') entityset = EntitySet(description['id']) for df in description['dataframes'].values(): if path is not None: data_path = os.path.join(path, 'data', df['name']) dataframe = read_woodwork_table(data_path, validate=False, **kwargs) else: dataframe = empty_dataframe(df) entityset.add_dataframe(dataframe) for relationship in description['relationships']: rel = Relationship.from_dictionary(relationship, entityset) entityset.add_relationship(relationship=rel) return entityset
def from_dictionary(cls, arguments, entityset, dependencies, primitive): base_features = [dependencies[name] for name in arguments["base_features"]] relationship_path = [ Relationship.from_dictionary(r, entityset) for r in arguments["relationship_path"] ] parent_dataframe_name = relationship_path[0].parent_dataframe.ww.name relationship_path = RelationshipPath([(False, r) for r in relationship_path]) use_previous_data = arguments["use_previous"] use_previous = use_previous_data and Timedelta.from_dictionary( use_previous_data ) where_name = arguments["where"] where = where_name and dependencies[where_name] feat = cls( base_features=base_features, parent_dataframe_name=parent_dataframe_name, primitive=primitive, relationship_path=relationship_path, use_previous=use_previous, where=where, name=arguments["name"], ) feat._names = arguments.get("feature_names") return feat
def from_dictionary(cls, arguments, entityset, dependencies, primitives_deserializer): base_features = [ dependencies[name] for name in arguments['base_features'] ] relationship_path = [ Relationship.from_dictionary(r, entityset) for r in arguments['relationship_path'] ] parent_dataframe_name = relationship_path[0].parent_dataframe.ww.name relationship_path = RelationshipPath([(False, r) for r in relationship_path]) primitive = primitives_deserializer.deserialize_primitive( arguments['primitive']) use_previous_data = arguments['use_previous'] use_previous = use_previous_data and Timedelta.from_dictionary( use_previous_data) where_name = arguments['where'] where = where_name and dependencies[where_name] return cls(base_features=base_features, parent_dataframe_name=parent_dataframe_name, primitive=primitive, relationship_path=relationship_path, use_previous=use_previous, where=where, name=arguments['name'])
def from_dictionary(cls, arguments, entityset, dependencies, primitives_deserializer): base_feature = dependencies[arguments['base_feature']] relationship = Relationship.from_dictionary(arguments['relationship'], entityset) child_dataframe_name = relationship.child_dataframe.ww.name return cls(base_feature=base_feature, child_dataframe_name=child_dataframe_name, relationship=relationship, name=arguments['name'])
def test_relationship_serialization(es): relationship = Relationship(es, 'sessions', 'id', 'log', 'session_id') dictionary = { 'parent_dataframe_name': 'sessions', 'parent_column_name': 'id', 'child_dataframe_name': 'log', 'child_column_name': 'session_id', } assert relationship.to_dictionary() == dictionary assert Relationship.from_dictionary(dictionary, es) == relationship
def test_relationship_serialization(es): relationship = Relationship(es['sessions']['id'], es['log']['session_id']) dictionary = { 'parent_entity_id': 'sessions', 'parent_variable_id': 'id', 'child_entity_id': 'log', 'child_variable_id': 'session_id', } assert relationship.to_dictionary() == dictionary assert Relationship.from_dictionary(dictionary, es) == relationship
def test_relationship_serialization(es): relationship = Relationship(es, "sessions", "id", "log", "session_id") dictionary = { "parent_dataframe_name": "sessions", "parent_column_name": "id", "child_dataframe_name": "log", "child_column_name": "session_id", } assert relationship.to_dictionary() == dictionary assert Relationship.from_dictionary(dictionary, es) == relationship
def from_dictionary(cls, arguments, entityset, dependencies, primitive): base_feature = dependencies[arguments["base_feature"]] relationship = Relationship.from_dictionary( arguments["relationship"], entityset ) child_dataframe_name = relationship.child_dataframe.ww.name return cls( base_feature=base_feature, child_dataframe_name=child_dataframe_name, relationship=relationship, name=arguments["name"], )
def description_to_entityset(description, **kwargs): """Deserialize entityset from data description. Args: description (dict) : Description of an :class:`.EntitySet`. Likely generated using :meth:`.serialize.entityset_to_description` kwargs (keywords): Additional keyword arguments to pass as keywords arguments to the underlying deserialization method. Returns: entityset (EntitySet) : Instance of :class:`.EntitySet`. """ check_schema_version(description, "entityset") from featuretools.entityset import EntitySet # If data description was not read from disk, path is None. path = description.get("path") entityset = EntitySet(description["id"]) for df in description["dataframes"].values(): if path is not None: data_path = os.path.join(path, "data", df["name"]) format = description.get("format") if format is not None: kwargs["format"] = format if format == "parquet" and df["loading_info"][ "table_type"] == "pandas": kwargs["filename"] = df["name"] + ".parquet" dataframe = read_woodwork_table(data_path, validate=False, **kwargs) else: dataframe = empty_dataframe(df) entityset.add_dataframe(dataframe) for relationship in description["relationships"]: rel = Relationship.from_dictionary(relationship, entityset) entityset.add_relationship(relationship=rel) return entityset