def restrict_column(user, column: models.Column): """Set category of the column to the existing set of values. Given a workflow and a column, modifies the column so that only the values already present are allowed for future updates. :param user: User executing this action :param column: Column object to restrict :return: String with error or None if correct """ # Load the data frame data_frame = pandas.load_table(column.workflow.get_data_frame_table_name()) cat_values = set(data_frame[column.name].dropna()) if not cat_values: raise errors.OnTaskColumnCategoryValueError( message=_('The column has no meaningful values')) # Set categories column.set_categories(list(cat_values)) # Re-evaluate the operands in the workflow column.log(user, models.Log.COLUMN_RESTRICT) column.workflow.set_query_builder_ops() column.workflow.save()
def add_column_to_workflow( user, workflow: models.Workflow, column: models.Column, column_initial_value: Any, action_column_event: Optional[str] = None, action: Optional[models.Action] = None, ): """Add a new column to the workflow. :param user: User executing the operation :param workflow: Used workflow :param column: Column to insert :param column_initial_value: Initial value for the column :param action_column_event: Event to record if the action/column needs to be stored :param action: If the column is a question, this is the action in which it is being used. :return: Nothing. Column added to Wflow and DB """ # Catch the special case of integer type and no initial value. Pandas # encodes it as NaN but a cycle through the database transforms it into # a string. To avoid this case, integer + empty value => double if column.data_type == 'integer' and column_initial_value is None: column.data_type = 'double' # Fill in the remaining fields in the column column.workflow = workflow column.is_key = False # Update the positions of the appropriate columns workflow.reposition_columns(workflow.ncols + 1, column.position) # Save column, refresh workflow, and increase number of columns column.save() workflow.refresh_from_db() workflow.ncols += 1 workflow.set_query_builder_ops() workflow.save() # Add the new column to the DB try: sql.add_column_to_db( workflow.get_data_frame_table_name(), column.name, column.data_type, initial=column_initial_value) except Exception as exc: raise services.OnTaskWorkflowAddColumn( message=_('Unable to add element: {0}').format(str(exc)), to_delete=[column]) if action_column_event: acc, __ = models.ActionColumnConditionTuple.objects.get_or_create( action=action, column=column, condition=None) acc.log(user, action_column_event) column.log(user, models.Log.COLUMN_ADD)
def add_random_column( user, workflow: models.Workflow, column: models.Column, data_frame: pd.DataFrame, ): """Add the formula column to the workflow. :param user: User making the request :param workflow: Workflow to add the column :param column: Column being added :param data_frame: Data frame of the current workflow :return: Column is added to the workflow """ # Empty new column new_column = [None] * workflow.nrows categories = column.get_categories() # Create the random partitions partitions = _partition( [idx for idx in range(workflow.nrows)], len(categories)) # Assign values to partitions for idx, indexes in enumerate(partitions): for col_idx in indexes: new_column[col_idx] = categories[idx] # Assign the new column to the data frame data_frame[column.name] = pd.Series( new_column, dtype=_ontask_type_to_pd_type[column.data_type], index=data_frame.index) if column.data_type == 'datetime': data_frame[column.name] = data_frame[column.name].dt.tz_localize( settings.TIME_ZONE) # Update the positions of the appropriate columns workflow.reposition_columns(workflow.ncols + 1, column.position) workflow.refresh_from_db() # Store the df to DB try: pandas.store_dataframe(data_frame, workflow) except Exception as exc: raise services.OnTaskWorkflowStoreError( message=_('Unable to add the column: {0}').format(str(exc)), to_delete=[column]) workflow.ncols = workflow.columns.count() workflow.save() # Log the event column.log(user, models.Log.COLUMN_ADD_RANDOM)
def update_column( user, workflow: models.Workflow, column: models.Column, old_name: str, old_position: int, action_column_item: Optional[models.Action] = None, action_column_event: Optional[str] = None, ): """Update information in a column. :param user: User performing the operation :param workflow: Workflow being processed :param column: Column being modified :param old_name: Old name to detect renaming :param old_position: old position to detect repositioning :param action_column_item: Action/column item in case the action is stored in that table. :param action_column_event: Event to record the operation on the action/column :return: Nothing. Side effect in the workflow. """ # If there is a new name, rename the data frame columns if old_name != column.name: sql.db_rename_column( workflow.get_data_frame_table_name(), old_name, column.name) pandas.rename_df_column(workflow, old_name, column.name) if old_position != column.position: # Update the positions of the appropriate columns workflow.reposition_columns(old_position, column.position) column.save() # Go back to the DB because the prefetch columns are not valid # any more workflow = models.Workflow.objects.prefetch_related('columns').get( id=workflow.id, ) # Changes in column require rebuilding the query_builder_ops workflow.set_query_builder_ops() # Save the workflow workflow.save() if action_column_item: action_column_item.log(user, action_column_event) # Log the event column.log(user, models.Log.COLUMN_EDIT)
def add_formula_column( user, workflow: models.Workflow, column: models.Column, operation: str, selected_columns: List[models.Column], ): """Add the formula column to the workflow. :param user: User making the request :param workflow: Workflow to add the column :param column: Column being added :param operation: string denoting the operation :param selected_columns: List of columns selected for the operation. :return: Column is added to the workflow """ # Save the column object attached to the form and add additional fields column.workflow = workflow column.is_key = False # Save the instance column.save() # Update the data frame df = pandas.load_table(workflow.get_data_frame_table_name()) # Add the column with the appropriate computation cnames = [col.name for col in selected_columns] df[column.name] = _op_distrib[operation](df[cnames]) # Populate the column type column.data_type = pandas.datatype_names.get( df[column.name].dtype.name) # Update the positions of the appropriate columns workflow.reposition_columns(workflow.ncols + 1, column.position) column.save() workflow.refresh_from_db() # Store the df to DB try: pandas.store_dataframe(df, workflow) except Exception as exc: raise services.OnTaskWorkflowAddColumn( message=_('Unable to add column: {0}').format(str(exc)), to_delete=[column]) workflow.ncols = workflow.columns.count() workflow.save() column.log(user, models.Log.COLUMN_ADD_FORMULA)
def delete_column( user: get_user_model(), workflow: models.Workflow, column: models.Column, cond_to_delete: Optional[List[models.Condition]] = None, ): """Remove column from ontask.workflow. Given a workflow and a column, removes it from the workflow (and the corresponding data frame :param user: User performing the operation :param workflow: Workflow object :param column: Column object to delete :param cond_to_delete: List of conditions to delete after removing the column :return: Nothing. Effect reflected in the database """ column.log(user, models.Log.COLUMN_DELETE) # Drop the column from the DB table storing the data frame sql.df_drop_column(workflow.get_data_frame_table_name(), column.name) # Reposition the columns above the one being deleted workflow.reposition_columns(column.position, workflow.ncols + 1) # Delete the column column.delete() # Update the information in the workflow workflow.ncols = workflow.ncols - 1 workflow.save() if not cond_to_delete: # The conditions to delete are not given, so calculate them # Get the conditions/actions attached to this workflow cond_to_delete = [ cond for cond in models.Condition.objects.filter( action__workflow=workflow) if column in cond.columns.all() ] # If a column disappears, the conditions that contain that variable # are removed actions_without_filters = [] for condition in cond_to_delete: if condition.is_filter: actions_without_filters.append(condition.action) # Formula has the name of the deleted column. Delete it condition.delete() # Traverse the actions for which the filter has been deleted and reassess # all their conditions # TODO: Explore how to do this asynchronously (or lazy) for act in actions_without_filters: act.update_n_rows_selected() # If a column disappears, the views that contain only that column need to # disappear as well as they are no longer relevant. for view in workflow.views.all(): if view.columns.count() == 0: view.delete()
def add_random_column( user, workflow: models.Workflow, column: models.Column, data_frame: pd.DataFrame, ): """Add the formula column to the workflow. :param user: User making the request :param workflow: Workflow to add the column :param column: Column being added :param data_frame: Data frame of the current workflow :return: Column is added to the workflow """ # Save the column object attached to the form and add additional fields column.workflow = workflow column.is_key = False # Detect the case of a single integer as initial value so that it is # expanded try: int_value = int(column.categories[0]) if int_value <= 1: raise services.OnTaskWorkflowIntegerLowerThanOne( field_name='values', message=_('The integer value has to be larger than 1')) column.set_categories([idx + 1 for idx in range(int_value)]) except (ValueError, TypeError, IndexError): pass column.save() # Empty new column new_column = [None] * workflow.nrows # Create the random partitions partitions = _partition([idx for idx in range(workflow.nrows)], len(column.categories)) # Assign values to partitions for idx, indexes in enumerate(partitions): for col_idx in indexes: new_column[col_idx] = column.categories[idx] # Assign the new column to the data frame data_frame[column.name] = new_column # Update the positions of the appropriate columns workflow.reposition_columns(workflow.ncols + 1, column.position) workflow.refresh_from_db() # Store the df to DB try: pandas.store_dataframe(data_frame, workflow) except Exception as exc: raise services.OnTaskWorkflowStoreError( message=_('Unable to add the column: {0}').format(str(exc)), to_delete=[column]) workflow.ncols = workflow.columns.count() workflow.save() # Log the event column.log(user, models.Log.COLUMN_ADD_RANDOM)