def execute_inspections(op_id, caller_filename, lineno,
                                optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            function_info = FunctionInfo('statsmodel.api', 'add_constant')
            input_info = get_input_info(args[0], caller_filename, lineno,
                                        function_info, optional_code_reference,
                                        optional_source_code)

            operator_context = OperatorContext(OperatorType.PROJECTION_MODIFY,
                                               function_info)
            input_infos = SklearnBackend.before_call(
                operator_context, [input_info.annotated_dfobject])
            result = original(input_infos[0].result_data, *args[1:], **kwargs)
            backend_result = SklearnBackend.after_call(operator_context,
                                                       input_infos, result)
            new_return_value = backend_result.annotated_dfobject.result_data

            dag_node = DagNode(
                op_id, BasicCodeLocation(caller_filename,
                                         lineno), operator_context,
                DagNodeDetails("Adds const column", ["array"]),
                get_optional_code_info_or_none(optional_code_reference,
                                               optional_source_code))
            add_dag_node(dag_node, [input_info.dag_node], backend_result)

            return new_return_value
def get_expected_check_result_simple_imputer():
    """ Expected result for the code snippet in test_no_bias_introduced_for_simple_imputer"""
    dag_node = DagNode(
        1, BasicCodeLocation('<string-source>', 6),
        OperatorContext(OperatorType.TRANSFORMER,
                        FunctionInfo('sklearn.impute._base', 'SimpleImputer')),
        DagNodeDetails('Simple Imputer: fit_transform', ['A']),
        OptionalCodeInfo(
            CodeReference(6, 10, 6, 72),
            "SimpleImputer(missing_values=np.nan, strategy='most_frequent')"))

    change_df = DataFrame({
        'sensitive_column_value': ['cat_a', 'cat_c', math.nan],
        'count_before': [2, 1, 1],
        'count_after': [3, 1, 0],
        'removed_records': [-1, 0, 1],
        'removal_probability': [0., 0., 1.],
        'normalized_removal_probability': [0., 0., 1.]
    })
    expected_probabilities = RemovalProbabilities(dag_node, True, 0.,
                                                  change_df)
    expected_dag_node_to_change = {dag_node: {'A': expected_probabilities}}
    failure_message = None
    expected_result = SimilarRemovalProbabilitiesForResult(
        SimilarRemovalProbabilitiesFor(['A']), CheckStatus.SUCCESS,
        failure_message, expected_dag_node_to_change)
    return expected_result
def get_expected_check_result_merge():
    """ Expected result for the code snippet in test_no_bias_introduced_for_merge"""
    dag_node = DagNode(
        2, BasicCodeLocation('<string-source>', 5),
        OperatorContext(OperatorType.JOIN,
                        FunctionInfo('pandas.core.frame', 'merge')),
        DagNodeDetails("on 'B'", ['A', 'B', 'C']),
        OptionalCodeInfo(CodeReference(5, 12, 5, 36),
                         "df_a.merge(df_b, on='B')"))

    change_df = DataFrame({
        'sensitive_column_value': ['cat_a', 'cat_b', 'cat_c'],
        'count_before': [2, 2, 1],
        'count_after': [2, 1, 1],
        'removed_records': [0, 1, 0],
        'removal_probability': [0., 0.5, 0.],
        'normalized_removal_probability': [0., 1., 0.]
    })
    expected_probabilities = RemovalProbabilities(dag_node, True, 1.,
                                                  change_df)
    expected_dag_node_to_change = {dag_node: {'A': expected_probabilities}}
    failure_message = None
    expected_result = SimilarRemovalProbabilitiesForResult(
        SimilarRemovalProbabilitiesFor(['A']), CheckStatus.SUCCESS,
        failure_message, expected_dag_node_to_change)
    return expected_result
Example #4
0
def get_expected_check_result_simple_imputer():
    """ Expected result for the code snippet in test_no_bias_introduced_for_simple_imputer"""
    imputer_dag_node = DagNode(
        1, BasicCodeLocation('<string-source>', 6),
        OperatorContext(OperatorType.TRANSFORMER,
                        FunctionInfo('sklearn.impute._base', 'SimpleImputer')),
        DagNodeDetails('Simple Imputer', ['A']),
        OptionalCodeInfo(
            CodeReference(6, 10, 6, 72),
            "SimpleImputer(missing_values=np.nan, strategy='most_frequent')"))

    change_df = DataFrame({
        'sensitive_column_value': ['cat_a', 'cat_c', math.nan],
        'count_before': [2, 1, 1],
        'count_after': [3, 1, 0],
        'ratio_before': [0.5, 0.25, 0.25],
        'ratio_after': [0.75, 0.25, 0.],
        'relative_ratio_change': [0.5, 0., -1.]
    })
    expected_distribution_change = BiasDistributionChange(
        imputer_dag_node, True, 0., change_df)
    expected_dag_node_to_change = {
        imputer_dag_node: {
            'A': expected_distribution_change
        }
    }
    expected_result = NoBiasIntroducedForResult(NoBiasIntroducedFor(['A']),
                                                CheckStatus.SUCCESS, None,
                                                expected_dag_node_to_change)
    return expected_result
Example #5
0
        def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            function_info = FunctionInfo('pandas.core.groupby.generic', 'agg')
            if not hasattr(self, '_mlinspect_dag_node'):
                raise NotImplementedError("TODO: Support agg if groupby happened in external code")
            input_dag_node = get_dag_node_for_id(self._mlinspect_dag_node)  # pylint: disable=no-member

            operator_context = OperatorContext(OperatorType.GROUP_BY_AGG, function_info)

            input_infos = PandasBackend.before_call(operator_context, [])
            result = original(self, *args, **kwargs)
            backend_result = PandasBackend.after_call(operator_context,
                                                      input_infos,
                                                      result)

            if len(args) > 0:
                description = "Groupby '{}', Aggregate: '{}'".format(result.index.name, args)
            else:
                description = "Groupby '{}', Aggregate: '{}'".format(result.index.name, kwargs)
            columns = [result.index.name] + list(result.columns)
            dag_node = DagNode(op_id,
                               BasicCodeLocation(caller_filename, lineno),
                               operator_context,
                               DagNodeDetails(description, columns),
                               get_optional_code_info_or_none(optional_code_reference, optional_source_code))
            add_dag_node(dag_node, [input_dag_node], backend_result)
            new_return_value = backend_result.annotated_dfobject.result_data

            return new_return_value
Example #6
0
        def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            function_info = FunctionInfo('pandas.core.frame', 'replace')

            input_info = get_input_info(self, caller_filename, lineno, function_info, optional_code_reference,
                                        optional_source_code)
            operator_context = OperatorContext(OperatorType.PROJECTION_MODIFY, function_info)
            input_infos = PandasBackend.before_call(operator_context, [input_info.annotated_dfobject])
            # No input_infos copy needed because it's only a selection and the rows not being removed don't change
            result = original(input_infos[0].result_data, *args, **kwargs)
            backend_result = PandasBackend.after_call(operator_context,
                                                      input_infos,
                                                      result)
            result = backend_result.annotated_dfobject.result_data
            if isinstance(args[0], dict):
                raise NotImplementedError("TODO: Add support for replace with dicts")
            description = "Replace '{}' with '{}'".format(args[0], args[1])
            dag_node = DagNode(op_id,
                               BasicCodeLocation(caller_filename, lineno),
                               operator_context,
                               DagNodeDetails(description, list(result.columns)),
                               get_optional_code_info_or_none(optional_code_reference, optional_source_code))
            add_dag_node(dag_node, [input_info.dag_node], backend_result)

            return result
Example #7
0
        def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            function_info = FunctionInfo('pandas.core.frame', 'merge')

            input_info_a = get_input_info(self, caller_filename, lineno, function_info, optional_code_reference,
                                          optional_source_code)
            input_info_b = get_input_info(args[0], caller_filename, lineno, function_info, optional_code_reference,
                                          optional_source_code)
            operator_context = OperatorContext(OperatorType.JOIN, function_info)
            input_infos = PandasBackend.before_call(operator_context, [input_info_a.annotated_dfobject,
                                                                       input_info_b.annotated_dfobject])
            # No input_infos copy needed because it's only a selection and the rows not being removed don't change
            result = original(input_infos[0].result_data, input_infos[1].result_data, *args[1:], **kwargs)
            backend_result = PandasBackend.after_call(operator_context,
                                                      input_infos,
                                                      result)
            result = backend_result.annotated_dfobject.result_data
            description = "on '{}'".format(kwargs['on'])
            dag_node = DagNode(op_id,
                               BasicCodeLocation(caller_filename, lineno),
                               operator_context,
                               DagNodeDetails(description, list(result.columns)),
                               get_optional_code_info_or_none(optional_code_reference, optional_source_code))
            add_dag_node(dag_node, [input_info_a.dag_node, input_info_b.dag_node], backend_result)

            return result
Example #8
0
        def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            # pylint: disable=too-many-locals
            function_info = FunctionInfo('pandas.core.frame', '__setitem__')
            operator_context = OperatorContext(OperatorType.PROJECTION_MODIFY, function_info)

            input_info = get_input_info(self, caller_filename, lineno, function_info, optional_code_reference,
                                        optional_source_code)

            if isinstance(args[0], str):
                input_infos = PandasBackend.before_call(operator_context, [input_info.annotated_dfobject])
                input_infos = copy.deepcopy(input_infos)
                result = original(self, *args, **kwargs)
                backend_result = PandasBackend.after_call(operator_context,
                                                          input_infos,
                                                          self)
                columns = list(self.columns)  # pylint: disable=no-member
                description = "modifies {}".format([args[0]])
            else:
                raise NotImplementedError("TODO: Handling __setitem__ for key type {}".format(type(args[0])))
            dag_node = DagNode(op_id,
                               BasicCodeLocation(caller_filename, lineno),
                               operator_context,
                               DagNodeDetails(description, columns),
                               get_optional_code_info_or_none(optional_code_reference, optional_source_code))
            add_dag_node(dag_node, [input_info.dag_node], backend_result)
            assert hasattr(self, "_mlinspect_annotation")
            return result
Example #9
0
def get_expected_check_result_merge():
    """ Expected result for the code snippet in test_no_bias_introduced_for_merge"""
    failing_dag_node = DagNode(
        2, BasicCodeLocation('<string-source>', 5),
        OperatorContext(OperatorType.JOIN,
                        FunctionInfo('pandas.core.frame', 'merge')),
        DagNodeDetails("on 'B'", ['A', 'B', 'C']),
        OptionalCodeInfo(CodeReference(5, 12, 5, 36),
                         "df_a.merge(df_b, on='B')"))

    change_df = DataFrame({
        'sensitive_column_value': ['cat_a', 'cat_b', 'cat_c'],
        'count_before': [2, 2, 1],
        'count_after': [2, 1, 1],
        'ratio_before': [0.4, 0.4, 0.2],
        'ratio_after': [0.5, 0.25, 0.25],
        'relative_ratio_change': [(0.5 - 0.4) / 0.4, (.25 - 0.4) / 0.4,
                                  (0.25 - 0.2) / 0.2]
    })
    expected_distribution_change = BiasDistributionChange(
        failing_dag_node, False, (.25 - 0.4) / 0.4, change_df)
    expected_dag_node_to_change = {
        failing_dag_node: {
            'A': expected_distribution_change
        }
    }
    failure_message = 'A Join causes a min_relative_ratio_change of \'A\' by -0.37500000000000006, a value below the ' \
                      'configured minimum threshold -0.3!'
    expected_result = NoBiasIntroducedForResult(NoBiasIntroducedFor(['A']),
                                                CheckStatus.FAILURE,
                                                failure_message,
                                                expected_dag_node_to_change)
    return expected_result
Example #10
0
def test_no_missing_embeddings():
    """
    Tests whether NoMissingEmbeddings works for joins
    """
    test_code = cleandoc("""
            import pandas as pd
            from example_pipelines.healthcare.healthcare_utils import MyW2VTransformer

            df = pd.DataFrame({'A': ['cat_a', 'cat_b', 'cat_a', 'cat_c']})
            word_to_vec = MyW2VTransformer(min_count=2, size=2, workers=1)
            encoded_data = word_to_vec.fit_transform(df)
            """)

    inspector_result = PipelineInspector \
        .on_pipeline_from_string(test_code) \
        .add_check(NoMissingEmbeddings()) \
        .add_custom_monkey_patching_module(custom_monkeypatching) \
        .execute()

    check_result = inspector_result.check_to_check_results[NoMissingEmbeddings()]
    expected_failed_dag_node_with_result = {
        DagNode(1,
                BasicCodeLocation('<string-source>', 5),
                OperatorContext(OperatorType.TRANSFORMER,
                                FunctionInfo('example_pipelines.healthcare.healthcare_utils', 'MyW2VTransformer')),
                DagNodeDetails('Word2Vec: fit_transform', ['array']),
                OptionalCodeInfo(CodeReference(5, 14, 5, 62), 'MyW2VTransformer(min_count=2, size=2, workers=1)'))
        : MissingEmbeddingsInfo(2, ['cat_b', 'cat_c'])}
    expected_result = NoMissingEmbeddingsResult(NoMissingEmbeddings(10), CheckStatus.FAILURE,
                                                'Missing embeddings were found!', expected_failed_dag_node_with_result)
    compare(check_result, expected_result)
def get_expected_check_result_dropna():
    """ Expected result for the code snippet in test_no_bias_introduced_for_dropna"""
    dag_node = DagNode(
        1, BasicCodeLocation('<string-source>', 5),
        OperatorContext(OperatorType.SELECTION,
                        FunctionInfo('pandas.core.frame', 'dropna')),
        DagNodeDetails("dropna", ['A', 'B']),
        OptionalCodeInfo(CodeReference(5, 5, 5, 16), "df.dropna()"))

    change_df = DataFrame({
        'sensitive_column_value': ['cat_a', 'cat_c'],
        'count_before': [2, 3],
        'count_after': [0, 2],
        'removed_records': [2, 1],
        'removal_probability': [1., 1. / 3.],
        'normalized_removal_probability': [3., 1.]
    })
    expected_probabilities = RemovalProbabilities(dag_node, False, 3.,
                                                  change_df)
    expected_dag_node_to_change = {dag_node: {'A': expected_probabilities}}
    failure_message = "A Selection causes a max_probability_difference of 'A' by 3.0, a value above the configured " \
                      "maximum threshold 2.0!"
    expected_result = SimilarRemovalProbabilitiesForResult(
        SimilarRemovalProbabilitiesFor(['A']), CheckStatus.FAILURE,
        failure_message, expected_dag_node_to_change)
    return expected_result
Example #12
0
        def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            function_info = FunctionInfo('pandas.core.frame', '__getitem__')
            input_info = get_input_info(self, caller_filename, lineno, function_info, optional_code_reference,
                                        optional_source_code)
            if isinstance(args[0], str):  # Projection to Series
                columns = [args[0]]
                operator_context = OperatorContext(OperatorType.PROJECTION, function_info)
                dag_node = DagNode(op_id,
                                   BasicCodeLocation(caller_filename, lineno),
                                   operator_context,
                                   DagNodeDetails("to {}".format(columns), columns),
                                   get_optional_code_info_or_none(optional_code_reference, optional_source_code))
            elif isinstance(args[0], list) and isinstance(args[0][0], str):  # Projection to DF
                columns = args[0]
                operator_context = OperatorContext(OperatorType.PROJECTION, function_info)
                dag_node = DagNode(op_id,
                                   BasicCodeLocation(caller_filename, lineno),
                                   operator_context,
                                   DagNodeDetails("to {}".format(columns), columns),
                                   get_optional_code_info_or_none(optional_code_reference, optional_source_code))
            elif isinstance(args[0], pandas.Series):  # Selection
                operator_context = OperatorContext(OperatorType.SELECTION, function_info)
                columns = list(self.columns)  # pylint: disable=no-member
                if optional_source_code:
                    description = "Select by Series: {}".format(optional_source_code)
                else:
                    description = "Select by Series"
                dag_node = DagNode(op_id,
                                   BasicCodeLocation(caller_filename, lineno),
                                   operator_context,
                                   DagNodeDetails(description, columns),
                                   get_optional_code_info_or_none(optional_code_reference, optional_source_code))
            else:
                raise NotImplementedError()
            input_infos = PandasBackend.before_call(operator_context, [input_info.annotated_dfobject])
            result = original(input_infos[0].result_data, *args, **kwargs)
            backend_result = PandasBackend.after_call(operator_context,
                                                      input_infos,
                                                      result)
            result = backend_result.annotated_dfobject.result_data
            add_dag_node(dag_node, [input_info.dag_node], backend_result)

            return result
Example #13
0
        def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            function_info = FunctionInfo('pandas.core.frame', 'DataFrame')
            operator_context = OperatorContext(OperatorType.DATA_SOURCE, function_info)
            input_infos = PandasBackend.before_call(operator_context, [])
            original(self, *args, **kwargs)
            result = self
            backend_result = PandasBackend.after_call(operator_context, input_infos, result)

            columns = list(self.columns)  # pylint: disable=no-member
            dag_node = DagNode(op_id,
                               BasicCodeLocation(caller_filename, lineno),
                               operator_context,
                               DagNodeDetails(None, columns),
                               get_optional_code_info_or_none(optional_code_reference, optional_source_code))
            add_dag_node(dag_node, [], backend_result)
Example #14
0
        def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            function_info = FunctionInfo('pandas.io.parsers', 'read_csv')

            operator_context = OperatorContext(OperatorType.DATA_SOURCE, function_info)
            input_infos = PandasBackend.before_call(operator_context, [])
            result = original(*args, **kwargs)
            backend_result = PandasBackend.after_call(operator_context,
                                                      input_infos,
                                                      result)

            description = "{}".format(args[0].split(os.path.sep)[-1])
            dag_node = DagNode(op_id,
                               BasicCodeLocation(caller_filename, lineno),
                               operator_context,
                               DagNodeDetails(description, list(result.columns)),
                               get_optional_code_info_or_none(optional_code_reference, optional_source_code))
            add_dag_node(dag_node, [], backend_result)
            return result
Example #15
0
        def execute_inspections(op_id, caller_filename, lineno,
                                optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            function_info = FunctionInfo('numpy.random', 'random')
            operator_context = OperatorContext(OperatorType.DATA_SOURCE,
                                               function_info)
            input_infos = SklearnBackend.before_call(operator_context, [])
            result = original(*args, **kwargs)
            backend_result = SklearnBackend.after_call(operator_context,
                                                       input_infos, result)

            dag_node = DagNode(
                op_id, BasicCodeLocation(caller_filename, lineno),
                operator_context, DagNodeDetails("random", ['array']),
                get_optional_code_info_or_none(optional_code_reference,
                                               optional_source_code))
            add_dag_node(dag_node, [], backend_result)
            new_return_value = backend_result.annotated_dfobject.result_data
            return new_return_value
        def execute_inspections(op_id, caller_filename, lineno,
                                optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            function_info = FunctionInfo('statsmodels.datasets',
                                         'get_rdataset')

            operator_context = OperatorContext(OperatorType.DATA_SOURCE,
                                               function_info)
            input_infos = PandasBackend.before_call(operator_context, [])
            result = original(*args, **kwargs)
            backend_result = PandasBackend.after_call(operator_context,
                                                      input_infos, result.data)
            result.data = backend_result.annotated_dfobject.result_data
            dag_node = DagNode(
                op_id, BasicCodeLocation(caller_filename, lineno),
                operator_context,
                DagNodeDetails(result.title, list(result.data.columns)),
                get_optional_code_info_or_none(optional_code_reference,
                                               optional_source_code))
            add_dag_node(dag_node, [], backend_result)
            return result
Example #17
0
    def patched__getitem__(self, *args, **kwargs):
        """ Patch for ('pandas.core.series', 'Series') """
        original = gorilla.get_original_attribute(
            pandas.core.indexing._LocIndexer, '__getitem__')  # pylint: disable=protected-access

        if call_info_singleton.column_transformer_active:
            op_id = singleton.get_next_op_id()
            caller_filename = call_info_singleton.transformer_filename
            lineno = call_info_singleton.transformer_lineno
            function_info = call_info_singleton.transformer_function_info
            optional_code_reference = call_info_singleton.transformer_optional_code_reference
            optional_source_code = call_info_singleton.transformer_optional_source_code

            if isinstance(args[0], tuple) and not args[0][0].start and not args[0][0].stop \
                    and isinstance(args[0][1], list) and isinstance(args[0][1][0], str):
                # Projection to one or multiple columns, return value is df
                columns = args[0][1]
            else:
                raise NotImplementedError()

            operator_context = OperatorContext(OperatorType.PROJECTION, function_info)
            input_info = get_input_info(self.obj, caller_filename,  # pylint: disable=no-member
                                        lineno, function_info, optional_code_reference, optional_source_code)
            input_infos = PandasBackend.before_call(operator_context, [input_info.annotated_dfobject])
            result = original(self, *args, **kwargs)
            backend_result = PandasBackend.after_call(operator_context,
                                                      input_infos,
                                                      result)
            result = backend_result.annotated_dfobject.result_data

            dag_node = DagNode(op_id,
                               BasicCodeLocation(caller_filename, lineno),
                               operator_context,
                               DagNodeDetails("to {}".format(columns), columns),
                               get_optional_code_info_or_none(optional_code_reference, optional_source_code))
            add_dag_node(dag_node, [input_info.dag_node], backend_result)
        else:
            result = original(self, *args, **kwargs)

        return result
    def patched_fit(self, *args, **kwargs):
        """ Patch for ('statsmodel.api.OLS', 'fit') """
        # pylint: disable=no-method-argument, too-many-locals
        original = gorilla.get_original_attribute(api.OLS, 'fit')
        function_info = FunctionInfo('statsmodel.api.OLS', 'fit')

        # Train data
        # pylint: disable=no-member
        data_backend_result, train_data_node, train_data_result = add_train_data_node(
            self, self.data.exog, function_info)
        self.data.exog = train_data_result
        # pylint: disable=no-member
        label_backend_result, train_labels_node, train_labels_result = add_train_label_node(
            self, self.data.endog, function_info)
        self.data.endog = train_labels_result

        # Estimator
        operator_context = OperatorContext(OperatorType.ESTIMATOR,
                                           function_info)
        input_dfs = [
            data_backend_result.annotated_dfobject,
            label_backend_result.annotated_dfobject
        ]
        input_infos = SklearnBackend.before_call(operator_context, input_dfs)
        result = original(self, *args, **kwargs)
        estimator_backend_result = SklearnBackend.after_call(
            operator_context, input_infos, None)

        dag_node = DagNode(
            singleton.get_next_op_id(),
            BasicCodeLocation(self.mlinspect_caller_filename,
                              self.mlinspect_lineno), operator_context,
            DagNodeDetails("Decision Tree", []),
            get_optional_code_info_or_none(
                self.mlinspect_optional_code_reference,
                self.mlinspect_optional_source_code))
        add_dag_node(dag_node, [train_data_node, train_labels_node],
                     estimator_backend_result)
        return result
Example #19
0
        def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code):
            """ Execute inspections, add DAG node """
            function_info = FunctionInfo('pandas.core.frame', 'dropna')

            input_info = get_input_info(self, caller_filename, lineno, function_info, optional_code_reference,
                                        optional_source_code)
            operator_context = OperatorContext(OperatorType.SELECTION, function_info)
            input_infos = PandasBackend.before_call(operator_context, [input_info.annotated_dfobject])
            # No input_infos copy needed because it's only a selection and the rows not being removed don't change
            result = original(input_infos[0].result_data, *args[1:], **kwargs)
            if result is None:
                raise NotImplementedError("TODO: Support inplace dropna")
            backend_result = PandasBackend.after_call(operator_context,
                                                      input_infos,
                                                      result)
            result = backend_result.annotated_dfobject.result_data
            dag_node = DagNode(op_id,
                               BasicCodeLocation(caller_filename, lineno),
                               operator_context,
                               DagNodeDetails("dropna", list(result.columns)),
                               get_optional_code_info_or_none(optional_code_reference, optional_source_code))
            add_dag_node(dag_node, [input_info.dag_node], backend_result)

            return result