def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code): """ Execute inspections, add DAG node """ function_info = FunctionInfo('pandas.core.groupby.generic', 'agg') if not hasattr(self, '_mlinspect_dag_node'): raise NotImplementedError("TODO: Support agg if groupby happened in external code") input_dag_node = get_dag_node_for_id(self._mlinspect_dag_node) # pylint: disable=no-member operator_context = OperatorContext(OperatorType.GROUP_BY_AGG, function_info) input_infos = PandasBackend.before_call(operator_context, []) result = original(self, *args, **kwargs) backend_result = PandasBackend.after_call(operator_context, input_infos, result) if len(args) > 0: description = "Groupby '{}', Aggregate: '{}'".format(result.index.name, args) else: description = "Groupby '{}', Aggregate: '{}'".format(result.index.name, kwargs) columns = [result.index.name] + list(result.columns) dag_node = DagNode(op_id, BasicCodeLocation(caller_filename, lineno), operator_context, DagNodeDetails(description, columns), get_optional_code_info_or_none(optional_code_reference, optional_source_code)) add_dag_node(dag_node, [input_dag_node], backend_result) new_return_value = backend_result.annotated_dfobject.result_data return new_return_value
def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code): """ Execute inspections, add DAG node """ function_info = FunctionInfo('pandas.core.frame', 'merge') input_info_a = get_input_info(self, caller_filename, lineno, function_info, optional_code_reference, optional_source_code) input_info_b = get_input_info(args[0], caller_filename, lineno, function_info, optional_code_reference, optional_source_code) operator_context = OperatorContext(OperatorType.JOIN, function_info) input_infos = PandasBackend.before_call(operator_context, [input_info_a.annotated_dfobject, input_info_b.annotated_dfobject]) # No input_infos copy needed because it's only a selection and the rows not being removed don't change result = original(input_infos[0].result_data, input_infos[1].result_data, *args[1:], **kwargs) backend_result = PandasBackend.after_call(operator_context, input_infos, result) result = backend_result.annotated_dfobject.result_data description = "on '{}'".format(kwargs['on']) dag_node = DagNode(op_id, BasicCodeLocation(caller_filename, lineno), operator_context, DagNodeDetails(description, list(result.columns)), get_optional_code_info_or_none(optional_code_reference, optional_source_code)) add_dag_node(dag_node, [input_info_a.dag_node, input_info_b.dag_node], backend_result) return result
def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code): """ Execute inspections, add DAG node """ function_info = FunctionInfo('pandas.core.frame', 'replace') input_info = get_input_info(self, caller_filename, lineno, function_info, optional_code_reference, optional_source_code) operator_context = OperatorContext(OperatorType.PROJECTION_MODIFY, function_info) input_infos = PandasBackend.before_call(operator_context, [input_info.annotated_dfobject]) # No input_infos copy needed because it's only a selection and the rows not being removed don't change result = original(input_infos[0].result_data, *args, **kwargs) backend_result = PandasBackend.after_call(operator_context, input_infos, result) result = backend_result.annotated_dfobject.result_data if isinstance(args[0], dict): raise NotImplementedError("TODO: Add support for replace with dicts") description = "Replace '{}' with '{}'".format(args[0], args[1]) dag_node = DagNode(op_id, BasicCodeLocation(caller_filename, lineno), operator_context, DagNodeDetails(description, list(result.columns)), get_optional_code_info_or_none(optional_code_reference, optional_source_code)) add_dag_node(dag_node, [input_info.dag_node], backend_result) return result
def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code): """ Execute inspections, add DAG node """ # pylint: disable=too-many-locals function_info = FunctionInfo('pandas.core.frame', '__setitem__') operator_context = OperatorContext(OperatorType.PROJECTION_MODIFY, function_info) input_info = get_input_info(self, caller_filename, lineno, function_info, optional_code_reference, optional_source_code) if isinstance(args[0], str): input_infos = PandasBackend.before_call(operator_context, [input_info.annotated_dfobject]) input_infos = copy.deepcopy(input_infos) result = original(self, *args, **kwargs) backend_result = PandasBackend.after_call(operator_context, input_infos, self) columns = list(self.columns) # pylint: disable=no-member description = "modifies {}".format([args[0]]) else: raise NotImplementedError("TODO: Handling __setitem__ for key type {}".format(type(args[0]))) dag_node = DagNode(op_id, BasicCodeLocation(caller_filename, lineno), operator_context, DagNodeDetails(description, columns), get_optional_code_info_or_none(optional_code_reference, optional_source_code)) add_dag_node(dag_node, [input_info.dag_node], backend_result) assert hasattr(self, "_mlinspect_annotation") return result
def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code): """ Execute inspections, add DAG node """ function_info = FunctionInfo('pandas.core.frame', 'DataFrame') operator_context = OperatorContext(OperatorType.DATA_SOURCE, function_info) input_infos = PandasBackend.before_call(operator_context, []) original(self, *args, **kwargs) result = self backend_result = PandasBackend.after_call(operator_context, input_infos, result) columns = list(self.columns) # pylint: disable=no-member dag_node = DagNode(op_id, BasicCodeLocation(caller_filename, lineno), operator_context, DagNodeDetails(None, columns), get_optional_code_info_or_none(optional_code_reference, optional_source_code)) add_dag_node(dag_node, [], backend_result)
def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code): """ Execute inspections, add DAG node """ function_info = FunctionInfo('pandas.core.frame', '__getitem__') input_info = get_input_info(self, caller_filename, lineno, function_info, optional_code_reference, optional_source_code) if isinstance(args[0], str): # Projection to Series columns = [args[0]] operator_context = OperatorContext(OperatorType.PROJECTION, function_info) dag_node = DagNode(op_id, BasicCodeLocation(caller_filename, lineno), operator_context, DagNodeDetails("to {}".format(columns), columns), get_optional_code_info_or_none(optional_code_reference, optional_source_code)) elif isinstance(args[0], list) and isinstance(args[0][0], str): # Projection to DF columns = args[0] operator_context = OperatorContext(OperatorType.PROJECTION, function_info) dag_node = DagNode(op_id, BasicCodeLocation(caller_filename, lineno), operator_context, DagNodeDetails("to {}".format(columns), columns), get_optional_code_info_or_none(optional_code_reference, optional_source_code)) elif isinstance(args[0], pandas.Series): # Selection operator_context = OperatorContext(OperatorType.SELECTION, function_info) columns = list(self.columns) # pylint: disable=no-member if optional_source_code: description = "Select by Series: {}".format(optional_source_code) else: description = "Select by Series" dag_node = DagNode(op_id, BasicCodeLocation(caller_filename, lineno), operator_context, DagNodeDetails(description, columns), get_optional_code_info_or_none(optional_code_reference, optional_source_code)) else: raise NotImplementedError() input_infos = PandasBackend.before_call(operator_context, [input_info.annotated_dfobject]) result = original(input_infos[0].result_data, *args, **kwargs) backend_result = PandasBackend.after_call(operator_context, input_infos, result) result = backend_result.annotated_dfobject.result_data add_dag_node(dag_node, [input_info.dag_node], backend_result) return result
def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code): """ Execute inspections, add DAG node """ function_info = FunctionInfo('pandas.io.parsers', 'read_csv') operator_context = OperatorContext(OperatorType.DATA_SOURCE, function_info) input_infos = PandasBackend.before_call(operator_context, []) result = original(*args, **kwargs) backend_result = PandasBackend.after_call(operator_context, input_infos, result) description = "{}".format(args[0].split(os.path.sep)[-1]) dag_node = DagNode(op_id, BasicCodeLocation(caller_filename, lineno), operator_context, DagNodeDetails(description, list(result.columns)), get_optional_code_info_or_none(optional_code_reference, optional_source_code)) add_dag_node(dag_node, [], backend_result) return result
def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code): """ Execute inspections, add DAG node """ function_info = FunctionInfo('statsmodels.datasets', 'get_rdataset') operator_context = OperatorContext(OperatorType.DATA_SOURCE, function_info) input_infos = PandasBackend.before_call(operator_context, []) result = original(*args, **kwargs) backend_result = PandasBackend.after_call(operator_context, input_infos, result.data) result.data = backend_result.annotated_dfobject.result_data dag_node = DagNode( op_id, BasicCodeLocation(caller_filename, lineno), operator_context, DagNodeDetails(result.title, list(result.data.columns)), get_optional_code_info_or_none(optional_code_reference, optional_source_code)) add_dag_node(dag_node, [], backend_result) return result
def patched__getitem__(self, *args, **kwargs): """ Patch for ('pandas.core.series', 'Series') """ original = gorilla.get_original_attribute( pandas.core.indexing._LocIndexer, '__getitem__') # pylint: disable=protected-access if call_info_singleton.column_transformer_active: op_id = singleton.get_next_op_id() caller_filename = call_info_singleton.transformer_filename lineno = call_info_singleton.transformer_lineno function_info = call_info_singleton.transformer_function_info optional_code_reference = call_info_singleton.transformer_optional_code_reference optional_source_code = call_info_singleton.transformer_optional_source_code if isinstance(args[0], tuple) and not args[0][0].start and not args[0][0].stop \ and isinstance(args[0][1], list) and isinstance(args[0][1][0], str): # Projection to one or multiple columns, return value is df columns = args[0][1] else: raise NotImplementedError() operator_context = OperatorContext(OperatorType.PROJECTION, function_info) input_info = get_input_info(self.obj, caller_filename, # pylint: disable=no-member lineno, function_info, optional_code_reference, optional_source_code) input_infos = PandasBackend.before_call(operator_context, [input_info.annotated_dfobject]) result = original(self, *args, **kwargs) backend_result = PandasBackend.after_call(operator_context, input_infos, result) result = backend_result.annotated_dfobject.result_data dag_node = DagNode(op_id, BasicCodeLocation(caller_filename, lineno), operator_context, DagNodeDetails("to {}".format(columns), columns), get_optional_code_info_or_none(optional_code_reference, optional_source_code)) add_dag_node(dag_node, [input_info.dag_node], backend_result) else: result = original(self, *args, **kwargs) return result
def execute_inspections(op_id, caller_filename, lineno, optional_code_reference, optional_source_code): """ Execute inspections, add DAG node """ function_info = FunctionInfo('pandas.core.frame', 'dropna') input_info = get_input_info(self, caller_filename, lineno, function_info, optional_code_reference, optional_source_code) operator_context = OperatorContext(OperatorType.SELECTION, function_info) input_infos = PandasBackend.before_call(operator_context, [input_info.annotated_dfobject]) # No input_infos copy needed because it's only a selection and the rows not being removed don't change result = original(input_infos[0].result_data, *args[1:], **kwargs) if result is None: raise NotImplementedError("TODO: Support inplace dropna") backend_result = PandasBackend.after_call(operator_context, input_infos, result) result = backend_result.annotated_dfobject.result_data dag_node = DagNode(op_id, BasicCodeLocation(caller_filename, lineno), operator_context, DagNodeDetails("dropna", list(result.columns)), get_optional_code_info_or_none(optional_code_reference, optional_source_code)) add_dag_node(dag_node, [input_info.dag_node], backend_result) return result