def update_row(cls, current, past, *args, **kwargs): return update_ratio( past.get(FeatureUniquePathTotal.feature_name_from_class()), past.get(FeatureRequestTotal.feature_name_from_class()), current[FeatureUniquePathTotal.feature_name_from_class()], current[FeatureRequestTotal.feature_name_from_class()] )
def update_row(cls, current, past, *args, **kwargs): return update_mean( past.get(cls.feature_name_from_class()), current[cls.feature_name_from_class()], past.get(FeatureRequestTotal.feature_name_from_class()), current[FeatureRequestTotal.feature_name_from_class()] )
def test_update(self): schema = T.StructType([ T.StructField(self.feature.current_features_column, T.MapType(T.StringType(), T.FloatType())), T.StructField(self.feature.past_features_column, T.MapType(T.StringType(), T.FloatType())), ]) sub_df = self.session.createDataFrame([{ self.feature.current_features_column: { self.feature.feature_name: 6., FeatureRequestTotal.feature_name_from_class(): 3., }, self.feature.past_features_column: { self.feature.feature_name: 2., FeatureRequestTotal.feature_name_from_class(): 1., } }], schema=schema) result_df = self.feature.update(sub_df) result_df.show() value = result_df.select( self.feature.updated_feature_col_name).collect()[0][ self.feature.updated_feature_col_name] expected_value = 0.75 * 6. + 0.25 * 2. self.assertAlmostEqual(value, expected_value, places=2)
def update_row(cls, current, past, *args, **kwargs): return update_variance( past.get(cls.feature_name_from_class()), current[cls.feature_name_from_class()], past.get(FeatureRequestTotal.feature_name_from_class()), current[FeatureRequestTotal.feature_name_from_class()], past.get(FeaturePathDepthAverage.feature_name_from_class()), current[FeaturePathDepthAverage.feature_name_from_class()])
def test_update(self): count_col = FeatureRequestTotal.feature_name_from_class() mean_col = FeaturePathDepthAverage.feature_name_from_class() schema = T.StructType([ T.StructField(self.feature.current_features_column, T.MapType(T.StringType(), T.FloatType())), T.StructField(self.feature.past_features_column, T.MapType(T.StringType(), T.FloatType())), ]) sub_df = self.session.createDataFrame([{ self.feature.current_features_column: { self.feature.feature_name: 6., count_col: 3., mean_col: 5., }, self.feature.past_features_column: { self.feature.feature_name: 2., count_col: 1., mean_col: 4., } }], schema=schema) result_df = self.feature.update(sub_df) result_df.show() value = result_df.select( self.feature.updated_feature_col_name).collect()[0][ self.feature.updated_feature_col_name] from baskerville.features.helpers import update_variance expected_value = update_variance(2., 6., 1., 3., 4., 5.) print(expected_value) self.assertAlmostEqual(value, expected_value, places=2)
def update(self, df, feat_column='features', old_feat_column='old_features'): return super().update( df, self.feature_name, FeatureRequestTotal.feature_name_from_class(), FeatureRequestIntervalAverage.feature_name_from_class() )
def update(self, df, feat_column='features', old_feat_column='old_features'): return super().update( df, FeatureTopPageTotal.feature_name_from_class(), FeatureRequestTotal.feature_name_from_class(), )
def update(self, df): return super().update( df, numerator=FeatureRequestTotal.feature_name_from_class(), denominator=FeatureMinutesTotal.feature_name_from_class(), )