Esempio n. 1
0
 def update_row(cls, current, past, *args, **kwargs):
     return update_ratio(
         past.get(FeatureUniquePathTotal.feature_name_from_class()),
         past.get(FeatureRequestTotal.feature_name_from_class()),
         current[FeatureUniquePathTotal.feature_name_from_class()],
         current[FeatureRequestTotal.feature_name_from_class()]
     )
 def update_row(cls, current, past, *args, **kwargs):
     return update_mean(
         past.get(cls.feature_name_from_class()),
         current[cls.feature_name_from_class()],
         past.get(FeatureRequestTotal.feature_name_from_class()),
         current[FeatureRequestTotal.feature_name_from_class()]
     )
Esempio n. 3
0
    def test_update(self):
        schema = T.StructType([
            T.StructField(self.feature.current_features_column,
                          T.MapType(T.StringType(), T.FloatType())),
            T.StructField(self.feature.past_features_column,
                          T.MapType(T.StringType(), T.FloatType())),
        ])

        sub_df = self.session.createDataFrame([{
            self.feature.current_features_column: {
                self.feature.feature_name: 6.,
                FeatureRequestTotal.feature_name_from_class(): 3.,
            },
            self.feature.past_features_column: {
                self.feature.feature_name: 2.,
                FeatureRequestTotal.feature_name_from_class(): 1.,
            }
        }],
                                              schema=schema)
        result_df = self.feature.update(sub_df)

        result_df.show()
        value = result_df.select(
            self.feature.updated_feature_col_name).collect()[0][
                self.feature.updated_feature_col_name]
        expected_value = 0.75 * 6. + 0.25 * 2.
        self.assertAlmostEqual(value, expected_value, places=2)
Esempio n. 4
0
 def update_row(cls, current, past, *args, **kwargs):
     return update_variance(
         past.get(cls.feature_name_from_class()),
         current[cls.feature_name_from_class()],
         past.get(FeatureRequestTotal.feature_name_from_class()),
         current[FeatureRequestTotal.feature_name_from_class()],
         past.get(FeaturePathDepthAverage.feature_name_from_class()),
         current[FeaturePathDepthAverage.feature_name_from_class()])
Esempio n. 5
0
    def test_update(self):
        count_col = FeatureRequestTotal.feature_name_from_class()
        mean_col = FeaturePathDepthAverage.feature_name_from_class()
        schema = T.StructType([
            T.StructField(self.feature.current_features_column,
                          T.MapType(T.StringType(), T.FloatType())),
            T.StructField(self.feature.past_features_column,
                          T.MapType(T.StringType(), T.FloatType())),
        ])

        sub_df = self.session.createDataFrame([{
            self.feature.current_features_column: {
                self.feature.feature_name: 6.,
                count_col: 3.,
                mean_col: 5.,
            },
            self.feature.past_features_column: {
                self.feature.feature_name: 2.,
                count_col: 1.,
                mean_col: 4.,
            }
        }],
                                              schema=schema)
        result_df = self.feature.update(sub_df)

        result_df.show()
        value = result_df.select(
            self.feature.updated_feature_col_name).collect()[0][
                self.feature.updated_feature_col_name]
        from baskerville.features.helpers import update_variance
        expected_value = update_variance(2., 6., 1., 3., 4., 5.)
        print(expected_value)
        self.assertAlmostEqual(value, expected_value, places=2)
Esempio n. 6
0
 def update(self, df, feat_column='features', old_feat_column='old_features'):
     return super().update(
         df,
         self.feature_name,
         FeatureRequestTotal.feature_name_from_class(),
         FeatureRequestIntervalAverage.feature_name_from_class()
     )
Esempio n. 7
0
 def update(self,
            df,
            feat_column='features',
            old_feat_column='old_features'):
     return super().update(
         df,
         FeatureTopPageTotal.feature_name_from_class(),
         FeatureRequestTotal.feature_name_from_class(),
     )
 def update(self, df):
     return super().update(
         df,
         numerator=FeatureRequestTotal.feature_name_from_class(),
         denominator=FeatureMinutesTotal.feature_name_from_class(),
     )