Beispiel #1
0
    def _pandas_value_ratio(
        self,
        batches: Dict[str, Batch],
        execution_engine: PandasExecutionEngine,
        metric_domain_kwargs: Dict,
        metric_value_kwargs: Dict,
        metrics: Dict,
        runtime_configuration: dict = None,
    ):
        """Value Ratio MetricProvider Function, extracts nonnull count to use for obtaining the value ratio"""
        # Column Extraction
        series = execution_engine.get_domain_dataframe(
            domain_kwargs=metric_domain_kwargs, batches=batches)

        domain_metrics_lookup = get_domain_metrics_dict_by_name(
            metrics=metrics, metric_domain_kwargs=metric_domain_kwargs)
        nonnull_count = domain_metrics_lookup[
            "column_values.nonnull.unexpected_count"]

        wanted_value = metric_value_kwargs["value"]

        # Checking that the wanted value is indeed in the value set itself
        if wanted_value in series.value_counts():
            value_count = series.value_counts()[wanted_value]
        else:
            value_count = 0

        return value_count / nonnull_count
    def _pandas_columns(
        self,
        batches: Dict[str, Batch],
        execution_engine: PandasExecutionEngine,
        metric_domain_kwargs: Dict,
        metric_value_kwargs: Dict,
        metrics: Dict,
        runtime_configuration: dict = None,
    ):
        """Metric which returns all columns in a dataframe"""
        df = execution_engine.get_domain_dataframe(
            domain_kwargs=metric_domain_kwargs, batches=batches)

        cols = df.columns
        return cols.tolist()
    def _pandas_column_a_greater_than_b(
        self,
        batches: Dict[str, Batch],
        execution_engine: PandasExecutionEngine,
        metric_domain_kwargs: Dict,
        metric_value_kwargs: Dict,
        metrics: Dict,
        runtime_configuration: dict = None,
    ):
        """Metric which returns all columns in a dataframe"""
        df = execution_engine.get_domain_dataframe(
            domain_kwargs=metric_domain_kwargs, batches=batches)
        # Initialization of necessary value kwargs
        allow_cross_type_comparisons = None
        parse_strings_as_datetimes = None
        or_equal = None

        column_A = df[metric_value_kwargs["column_A"]]
        column_B = df[metric_value_kwargs["column_B"]]

        # If value kwargs are given that could impact outcome, initializing them
        if allow_cross_type_comparisons in metric_value_kwargs:
            allow_cross_type_comparisons = metric_value_kwargs[
                "allow_cross_type_comparisons"]

        if parse_strings_as_datetimes in metric_value_kwargs:
            parse_strings_as_datetimes = metric_value_kwargs[
                "parse_strings_as_datetimes"]

        if or_equal in metric_value_kwargs:
            or_equal = metric_value_kwargs["or_equal"]

        if allow_cross_type_comparisons:
            column_A = column_A.apply(str)
            column_B = column_B.apply(str)

        if parse_strings_as_datetimes:
            temp_column_A = column_A.map(parse)
            temp_column_B = column_B.map(parse)

        else:
            temp_column_A = column_A
            temp_column_B = column_B

        if or_equal:
            return temp_column_A >= temp_column_B
        else:
            return temp_column_A > temp_column_B
Beispiel #4
0
    def _pandas_equal_columns(
        self,
        batches: Dict[str, Batch],
        execution_engine: PandasExecutionEngine,
        metric_domain_kwargs: Dict,
        metric_value_kwargs: Dict,
        metrics: Dict,
        runtime_configuration: dict = None,
    ):
        """Metric which returns all columns in a dataframe"""
        df = execution_engine.get_domain_dataframe(
            domain_kwargs=metric_domain_kwargs, batches=batches)
        column_A = df[metric_value_kwargs["column_A"]]
        column_B = df[metric_value_kwargs["column_B"]]

        return (column_A == column_B).any()