def _infer_pd_column_type(column, series, rows_to_sample): dtype = None sample_count = 0 # Loop over all rows for this column to infer types for key, value in series.iteritems(): sample_count += 1 # Stop sampling at the row limit if sample_count > rows_to_sample: continue # Infer the specific type for this row current_dtype = python_type_to_feast_value_type(name=column, value=value) # Make sure the type is consistent for column if dtype: if dtype != current_dtype: raise ValueError( f"Type mismatch detected in column {column}. Both " f"the types {current_dtype} and {dtype} " f"have been found." ) else: # Store dtype in field to type map if it isnt already dtype = current_dtype return dtype
def create_feature_view(feature_dtype, feature_is_list, data_source): return driver_feature_view( data_source, value_type=python_type_to_feast_value_type( feature_dtype, value=get_feature_values_for_dtype(feature_dtype, feature_is_list)[0], ), )
def _infer_online_entity_rows( entity_rows: List[Union[GetOnlineFeaturesRequest.EntityRow, Dict[str, Any]]], ) -> List[GetOnlineFeaturesRequest.EntityRow]: """ Builds a list of EntityRow protos from Python native type format passed by user. Args: entity_rows: A list of dictionaries where each key is an entity and each value is feast.types.Value or Python native form. Returns: A list of EntityRow protos parsed from args. """ # Maintain backward compatibility with users providing EntityRow Proto if entity_rows and isinstance(entity_rows[0], GetOnlineFeaturesRequest.EntityRow): warnings.warn( "entity_rows parameter will only be accepting Dict format from Feast v0.7 onwards", DeprecationWarning, ) entity_rows_proto = cast( List[Union[GetOnlineFeaturesRequest.EntityRow]], entity_rows) return entity_rows_proto entity_rows_dicts = cast(List[Dict[str, Any]], entity_rows) entity_row_list = [] entity_type_map = dict() for entity in entity_rows_dicts: fields = {} for key, value in entity.items(): # Allow for feast.types.Value if isinstance(value, Value): proto_value = value else: # Infer the specific type for this row current_dtype = python_type_to_feast_value_type(name=key, value=value) if key not in entity_type_map: entity_type_map[key] = current_dtype else: if current_dtype != entity_type_map[key]: raise TypeError( f"Input entity {key} has mixed types, {current_dtype} and {entity_type_map[key]}. That is not allowed. " ) proto_value = _python_value_to_proto_value( current_dtype, value) fields[key] = proto_value entity_row_list.append( GetOnlineFeaturesRequest.EntityRow(fields=fields)) return entity_row_list
def infer_features(self): """ Infers the set of features associated to this feature view from the input source. Args: config: Configuration object used to configure the feature store. Raises: RegistryInferenceFailure: The set of features could not be inferred. """ df = pd.DataFrame() for input in self.inputs.values(): if type(input) == FeatureView: feature_view = cast(FeatureView, input) for feature in feature_view.features: dtype = feast_value_type_to_pandas_type(feature.dtype) df[f"{feature_view.name}__{feature.name}"] = pd.Series( dtype=dtype) df[f"{feature.name}"] = pd.Series(dtype=dtype) else: request_data = cast(RequestDataSource, input) for feature_name, feature_type in request_data.schema.items(): dtype = feast_value_type_to_pandas_type(feature_type) df[f"{feature_name}"] = pd.Series(dtype=dtype) output_df: pd.DataFrame = self.udf.__call__(df) inferred_features = [] for f, dt in zip(output_df.columns, output_df.dtypes): inferred_features.append( Feature(name=f, dtype=python_type_to_feast_value_type( f, type_name=str(dt)))) if self.features: missing_features = [] for specified_features in self.features: if specified_features not in inferred_features: missing_features.append(specified_features) if missing_features: raise errors.SpecifiedFeaturesNotPresentError( [f.name for f in missing_features], self.name) else: self.features = inferred_features if not self.features: raise RegistryInferenceFailure( "OnDemandFeatureView", f"Could not infer Features for the feature view '{self.name}'.", )
def infer_features(self): """ Infers the set of features associated to this feature view from the input source. Raises: RegistryInferenceFailure: The set of features could not be inferred. """ df = pd.DataFrame() for feature_view_projection in self.source_feature_view_projections.values( ): for feature in feature_view_projection.features: dtype = feast_value_type_to_pandas_type( feature.dtype.to_value_type()) df[f"{feature_view_projection.name}__{feature.name}"] = pd.Series( dtype=dtype) df[f"{feature.name}"] = pd.Series(dtype=dtype) for request_data in self.source_request_sources.values(): for field in request_data.schema: dtype = feast_value_type_to_pandas_type( field.dtype.to_value_type()) df[f"{field.name}"] = pd.Series(dtype=dtype) output_df: pd.DataFrame = self.udf.__call__(df) inferred_features = [] for f, dt in zip(output_df.columns, output_df.dtypes): inferred_features.append( Field( name=f, dtype=from_value_type( python_type_to_feast_value_type(f, type_name=str(dt))), )) if self.features: missing_features = [] for specified_features in self.features: if specified_features not in inferred_features: missing_features.append(specified_features) if missing_features: raise SpecifiedFeaturesNotPresentError( [f.name for f in missing_features], self.name) else: self.features = inferred_features if not self.features: raise RegistryInferenceFailure( "OnDemandFeatureView", f"Could not infer Features for the feature view '{self.name}'.", )
def _infer_online_entity_rows( entity_rows: List[Dict[str, Any]], ) -> List[GetOnlineFeaturesRequest.EntityRow]: """ Builds a list of EntityRow protos from Python native type format passed by user. Args: entity_rows: A list of dictionaries where each key is an entity and each value is feast.types.Value or Python native form. Returns: A list of EntityRow protos parsed from args. """ entity_rows_dicts = cast(List[Dict[str, Any]], entity_rows) entity_row_list = [] entity_type_map = dict() for entity in entity_rows_dicts: fields = {} for key, value in entity.items(): # Allow for feast.types.Value if isinstance(value, Value): proto_value = value else: # Infer the specific type for this row current_dtype = python_type_to_feast_value_type(name=key, value=value) if key not in entity_type_map: entity_type_map[key] = current_dtype else: if current_dtype != entity_type_map[key]: raise TypeError( f"Input entity {key} has mixed types, {current_dtype} and {entity_type_map[key]}. That is not allowed. " ) proto_value = _python_value_to_proto_value( current_dtype, value) fields[key] = proto_value entity_row_list.append( GetOnlineFeaturesRequest.EntityRow(fields=fields)) return entity_row_list