def _pandas_value_ratio( self, batches: Dict[str, Batch], execution_engine: PandasExecutionEngine, metric_domain_kwargs: Dict, metric_value_kwargs: Dict, metrics: Dict, runtime_configuration: dict = None, ): """Value Ratio MetricProvider Function, extracts nonnull count to use for obtaining the value ratio""" # Column Extraction series = execution_engine.get_domain_dataframe( domain_kwargs=metric_domain_kwargs, batches=batches) domain_metrics_lookup = get_domain_metrics_dict_by_name( metrics=metrics, metric_domain_kwargs=metric_domain_kwargs) nonnull_count = domain_metrics_lookup[ "column_values.nonnull.unexpected_count"] wanted_value = metric_value_kwargs["value"] # Checking that the wanted value is indeed in the value set itself if wanted_value in series.value_counts(): value_count = series.value_counts()[wanted_value] else: value_count = 0 return value_count / nonnull_count
def _pandas_columns( self, batches: Dict[str, Batch], execution_engine: PandasExecutionEngine, metric_domain_kwargs: Dict, metric_value_kwargs: Dict, metrics: Dict, runtime_configuration: dict = None, ): """Metric which returns all columns in a dataframe""" df = execution_engine.get_domain_dataframe( domain_kwargs=metric_domain_kwargs, batches=batches) cols = df.columns return cols.tolist()
def _pandas_column_a_greater_than_b( self, batches: Dict[str, Batch], execution_engine: PandasExecutionEngine, metric_domain_kwargs: Dict, metric_value_kwargs: Dict, metrics: Dict, runtime_configuration: dict = None, ): """Metric which returns all columns in a dataframe""" df = execution_engine.get_domain_dataframe( domain_kwargs=metric_domain_kwargs, batches=batches) # Initialization of necessary value kwargs allow_cross_type_comparisons = None parse_strings_as_datetimes = None or_equal = None column_A = df[metric_value_kwargs["column_A"]] column_B = df[metric_value_kwargs["column_B"]] # If value kwargs are given that could impact outcome, initializing them if allow_cross_type_comparisons in metric_value_kwargs: allow_cross_type_comparisons = metric_value_kwargs[ "allow_cross_type_comparisons"] if parse_strings_as_datetimes in metric_value_kwargs: parse_strings_as_datetimes = metric_value_kwargs[ "parse_strings_as_datetimes"] if or_equal in metric_value_kwargs: or_equal = metric_value_kwargs["or_equal"] if allow_cross_type_comparisons: column_A = column_A.apply(str) column_B = column_B.apply(str) if parse_strings_as_datetimes: temp_column_A = column_A.map(parse) temp_column_B = column_B.map(parse) else: temp_column_A = column_A temp_column_B = column_B if or_equal: return temp_column_A >= temp_column_B else: return temp_column_A > temp_column_B
def _pandas_equal_columns( self, batches: Dict[str, Batch], execution_engine: PandasExecutionEngine, metric_domain_kwargs: Dict, metric_value_kwargs: Dict, metrics: Dict, runtime_configuration: dict = None, ): """Metric which returns all columns in a dataframe""" df = execution_engine.get_domain_dataframe( domain_kwargs=metric_domain_kwargs, batches=batches) column_A = df[metric_value_kwargs["column_A"]] column_B = df[metric_value_kwargs["column_B"]] return (column_A == column_B).any()