Ejemplo n.º 1
0
 def parse_schema_training_dataset(self, dataframe):
     return [
         training_dataset_feature.TrainingDatasetFeature(
             feat.name.lower(), feat.dataType.simpleString()
         )
         for feat in dataframe.schema
     ]
Ejemplo n.º 2
0
 def attach_transformation_fn(
     self, training_dataset_obj=None, feature_view_obj=None
 ):
     if training_dataset_obj:
         target_obj = training_dataset_obj
     else:
         target_obj = feature_view_obj
     if target_obj._transformation_functions:
         for (
             feature_name,
             transformation_fn,
         ) in target_obj._transformation_functions.items():
             if feature_name in target_obj.labels:
                 raise ValueError(
                     "Online transformations for training dataset labels are not supported."
                 )
             target_obj._features.append(
                 training_dataset_feature.TrainingDatasetFeature(
                     name=feature_name,
                     feature_group_feature_name=feature_name,
                     type=transformation_fn.output_type,
                     label=False,
                     transformation_function=transformation_fn,
                 )
             )
Ejemplo n.º 3
0
 def save(self, feature_view_obj):
     if feature_view_obj.labels:
         feature_view_obj._features.append([
             training_dataset_feature.TrainingDatasetFeature(
                 name=label_name, label=True)
             for label_name in feature_view_obj.labels
         ])
     self._transformation_function_engine.attach_transformation_fn(
         feature_view_obj)
     updated_fv = self._feature_view_api.post(feature_view_obj)
     print("Feature view created successfully, explore it at \n" +
           self._get_feature_view_url(updated_fv))
     return updated_fv
Ejemplo n.º 4
0
    def save(self, training_dataset, features, user_write_options):
        if isinstance(features, query.Query):
            training_dataset._querydto = features
            training_dataset._features = [
                training_dataset_feature.TrainingDatasetFeature(
                    name=label_name, label=True)
                for label_name in training_dataset.label
            ]
            self._transformation_function_engine.attach_transformation_fn(
                training_dataset)
        else:
            features = engine.get_instance().convert_to_default_dataframe(
                features)
            training_dataset._features = (
                engine.get_instance().parse_schema_training_dataset(features))
            for label_name in training_dataset.label:
                for feature in training_dataset._features:
                    if feature.name == label_name:
                        feature.label = True

            # check if user provided transformation functions and throw error as transformation functions work only
            # with query objects
            if training_dataset.transformation_functions:
                raise ValueError(
                    "Transformation functions can only be applied to training datasets generated from Query object"
                )

        if len(training_dataset.splits
               ) > 0 and training_dataset.train_split is None:
            training_dataset.train_split = "train"
            warnings.warn(
                "Training dataset splits were defined but no `train_split` (the name of the split that is going to be "
                "used for training) was provided. Setting this property to `train`. The statistics of this "
                "split will be used for transformation functions.")

        updated_instance = self._training_dataset_api.post(training_dataset)
        td_job = engine.get_instance().write_training_dataset(
            training_dataset, features, user_write_options, self.OVERWRITE)
        return updated_instance, td_job
    def save(self, training_dataset, features, user_write_options):
        if isinstance(features, query.Query):
            training_dataset._querydto = features
            training_dataset._features = [
                training_dataset_feature.TrainingDatasetFeature(
                    name=label_name, label=True
                )
                for label_name in training_dataset.label
            ]
        else:
            features = engine.get_instance().convert_to_default_dataframe(features)
            training_dataset._features = (
                engine.get_instance().parse_schema_training_dataset(features)
            )
            for label_name in training_dataset.label:
                for feature in training_dataset._features:
                    if feature.name == label_name:
                        feature.label = True

        self._training_dataset_api.post(training_dataset)

        engine.get_instance().write_training_dataset(
            training_dataset, features, user_write_options, self.OVERWRITE
        )