def parse_schema_training_dataset(self, dataframe): return [ training_dataset_feature.TrainingDatasetFeature( feat.name.lower(), feat.dataType.simpleString() ) for feat in dataframe.schema ]
def attach_transformation_fn( self, training_dataset_obj=None, feature_view_obj=None ): if training_dataset_obj: target_obj = training_dataset_obj else: target_obj = feature_view_obj if target_obj._transformation_functions: for ( feature_name, transformation_fn, ) in target_obj._transformation_functions.items(): if feature_name in target_obj.labels: raise ValueError( "Online transformations for training dataset labels are not supported." ) target_obj._features.append( training_dataset_feature.TrainingDatasetFeature( name=feature_name, feature_group_feature_name=feature_name, type=transformation_fn.output_type, label=False, transformation_function=transformation_fn, ) )
def save(self, feature_view_obj): if feature_view_obj.labels: feature_view_obj._features.append([ training_dataset_feature.TrainingDatasetFeature( name=label_name, label=True) for label_name in feature_view_obj.labels ]) self._transformation_function_engine.attach_transformation_fn( feature_view_obj) updated_fv = self._feature_view_api.post(feature_view_obj) print("Feature view created successfully, explore it at \n" + self._get_feature_view_url(updated_fv)) return updated_fv
def save(self, training_dataset, features, user_write_options): if isinstance(features, query.Query): training_dataset._querydto = features training_dataset._features = [ training_dataset_feature.TrainingDatasetFeature( name=label_name, label=True) for label_name in training_dataset.label ] self._transformation_function_engine.attach_transformation_fn( training_dataset) else: features = engine.get_instance().convert_to_default_dataframe( features) training_dataset._features = ( engine.get_instance().parse_schema_training_dataset(features)) for label_name in training_dataset.label: for feature in training_dataset._features: if feature.name == label_name: feature.label = True # check if user provided transformation functions and throw error as transformation functions work only # with query objects if training_dataset.transformation_functions: raise ValueError( "Transformation functions can only be applied to training datasets generated from Query object" ) if len(training_dataset.splits ) > 0 and training_dataset.train_split is None: training_dataset.train_split = "train" warnings.warn( "Training dataset splits were defined but no `train_split` (the name of the split that is going to be " "used for training) was provided. Setting this property to `train`. The statistics of this " "split will be used for transformation functions.") updated_instance = self._training_dataset_api.post(training_dataset) td_job = engine.get_instance().write_training_dataset( training_dataset, features, user_write_options, self.OVERWRITE) return updated_instance, td_job
def save(self, training_dataset, features, user_write_options): if isinstance(features, query.Query): training_dataset._querydto = features training_dataset._features = [ training_dataset_feature.TrainingDatasetFeature( name=label_name, label=True ) for label_name in training_dataset.label ] else: features = engine.get_instance().convert_to_default_dataframe(features) training_dataset._features = ( engine.get_instance().parse_schema_training_dataset(features) ) for label_name in training_dataset.label: for feature in training_dataset._features: if feature.name == label_name: feature.label = True self._training_dataset_api.post(training_dataset) engine.get_instance().write_training_dataset( training_dataset, features, user_write_options, self.OVERWRITE )