def restrict_column(user, column: models.Column): """Set category of the column to the existing set of values. Given a workflow and a column, modifies the column so that only the values already present are allowed for future updates. :param user: User executing this action :param column: Column object to restrict :return: String with error or None if correct """ # Load the data frame data_frame = pandas.load_table(column.workflow.get_data_frame_table_name()) cat_values = set(data_frame[column.name].dropna()) if not cat_values: raise errors.OnTaskColumnCategoryValueError( message=_('The column has no meaningful values')) # Set categories column.set_categories(list(cat_values)) # Re-evaluate the operands in the workflow column.log(user, models.Log.COLUMN_RESTRICT) column.workflow.set_query_builder_ops() column.workflow.save()
def workflow_restrict_column(column: Column) -> Optional[str]: """Set category of the column to the existing set of values. Given a workflow and a column, modifies the column so that only the values already present are allowed for future updates. :param column: Column object to restrict :return: String with error or None if correct """ # Load the data frame data_frame = load_table(column.workflow.get_data_frame_table_name()) cat_values = set(data_frame[column.name].dropna()) if not cat_values: # Column has no meaningful values. Nothing to do. return _('Column has no meaningful values') # Set categories column.set_categories(list(cat_values)) column.save() # Re-evaluate the operands in the workflow column.workflow.set_query_builder_ops() column.workflow.save() # Correct execution return None
def do_clone_column_only( column: Column, new_workflow: Optional[Workflow] = None, new_name: Optional[str] = None, ) -> Column: """Clone a column. :param column: Object to clone. :param new_workflow: Optional new worklow object to link to. :param new_name: Optional new name to use. :result: New object. """ if new_name is None: new_name = column.name if new_workflow is None: new_workflow = column.workflow new_column = Column( name=new_name, description_text=column.description_text, workflow=new_workflow, data_type=column.data_type, is_key=column.is_key, position=column.position, in_viz=column.in_viz, categories=copy.deepcopy(column.categories), active_from=column.active_from, active_to=column.active_to, ) new_column.save() return new_column
def add_random_column( user, workflow: models.Workflow, column: models.Column, data_frame: pd.DataFrame, ): """Add the formula column to the workflow. :param user: User making the request :param workflow: Workflow to add the column :param column: Column being added :param data_frame: Data frame of the current workflow :return: Column is added to the workflow """ # Empty new column new_column = [None] * workflow.nrows categories = column.get_categories() # Create the random partitions partitions = _partition( [idx for idx in range(workflow.nrows)], len(categories)) # Assign values to partitions for idx, indexes in enumerate(partitions): for col_idx in indexes: new_column[col_idx] = categories[idx] # Assign the new column to the data frame data_frame[column.name] = pd.Series( new_column, dtype=_ontask_type_to_pd_type[column.data_type], index=data_frame.index) if column.data_type == 'datetime': data_frame[column.name] = data_frame[column.name].dt.tz_localize( settings.TIME_ZONE) # Update the positions of the appropriate columns workflow.reposition_columns(workflow.ncols + 1, column.position) workflow.refresh_from_db() # Store the df to DB try: pandas.store_dataframe(data_frame, workflow) except Exception as exc: raise services.OnTaskWorkflowStoreError( message=_('Unable to add the column: {0}').format(str(exc)), to_delete=[column]) workflow.ncols = workflow.columns.count() workflow.save() # Log the event column.log(user, models.Log.COLUMN_ADD_RANDOM)
def update_column( user, workflow: models.Workflow, column: models.Column, old_name: str, old_position: int, action_column_item: Optional[models.Action] = None, action_column_event: Optional[str] = None, ): """Update information in a column. :param user: User performing the operation :param workflow: Workflow being processed :param column: Column being modified :param old_name: Old name to detect renaming :param old_position: old position to detect repositioning :param action_column_item: Action/column item in case the action is stored in that table. :param action_column_event: Event to record the operation on the action/column :return: Nothing. Side effect in the workflow. """ # If there is a new name, rename the data frame columns if old_name != column.name: sql.db_rename_column( workflow.get_data_frame_table_name(), old_name, column.name) pandas.rename_df_column(workflow, old_name, column.name) if old_position != column.position: # Update the positions of the appropriate columns workflow.reposition_columns(old_position, column.position) column.save() # Go back to the DB because the prefetch columns are not valid # any more workflow = models.Workflow.objects.prefetch_related('columns').get( id=workflow.id, ) # Changes in column require rebuilding the query_builder_ops workflow.set_query_builder_ops() # Save the workflow workflow.save() if action_column_item: action_column_item.log(user, action_column_event) # Log the event column.log(user, models.Log.COLUMN_EDIT)
def clean(self): """Validate the initial value.""" form_data = super().clean() # Try to convert the initial value ot the right type initial_value = form_data['initial_value'] if initial_value: # See if the given value is allowed for the column data type try: self.initial_valid_value = Column.validate_column_value( form_data['data_type'], initial_value, ) except ValueError: self.add_error( 'initial_value', _('Incorrect initial value'), ) categories = self.instance.get_categories() if categories and self.initial_valid_value not in categories: self.add_error( 'initial_value', _('This value is not in the list of allowed values'), ) return form_data
def add_formula_column( user, workflow: models.Workflow, column: models.Column, operation: str, selected_columns: List[models.Column], ): """Add the formula column to the workflow. :param user: User making the request :param workflow: Workflow to add the column :param column: Column being added :param operation: string denoting the operation :param selected_columns: List of columns selected for the operation. :return: Column is added to the workflow """ # Save the column object attached to the form and add additional fields column.workflow = workflow column.is_key = False # Save the instance column.save() # Update the data frame df = pandas.load_table(workflow.get_data_frame_table_name()) # Add the column with the appropriate computation cnames = [col.name for col in selected_columns] df[column.name] = _op_distrib[operation](df[cnames]) # Populate the column type column.data_type = pandas.datatype_names.get( df[column.name].dtype.name) # Update the positions of the appropriate columns workflow.reposition_columns(workflow.ncols + 1, column.position) column.save() workflow.refresh_from_db() # Store the df to DB try: pandas.store_dataframe(df, workflow) except Exception as exc: raise services.OnTaskWorkflowAddColumn( message=_('Unable to add column: {0}').format(str(exc)), to_delete=[column]) workflow.ncols = workflow.columns.count() workflow.save() column.log(user, models.Log.COLUMN_ADD_FORMULA)
def add_column_to_workflow( user, workflow: models.Workflow, column: models.Column, column_initial_value: Any, action_column_event: Optional[str] = None, action: Optional[models.Action] = None, ): """Add a new column to the workflow. :param user: User executing the operation :param workflow: Used workflow :param column: Column to insert :param column_initial_value: Initial value for the column :param action_column_event: Event to record if the action/column needs to be stored :param action: If the column is a question, this is the action in which it is being used. :return: Nothing. Column added to Wflow and DB """ # Catch the special case of integer type and no initial value. Pandas # encodes it as NaN but a cycle through the database transforms it into # a string. To avoid this case, integer + empty value => double if column.data_type == 'integer' and column_initial_value is None: column.data_type = 'double' # Fill in the remaining fields in the column column.workflow = workflow column.is_key = False # Update the positions of the appropriate columns workflow.reposition_columns(workflow.ncols + 1, column.position) # Save column, refresh workflow, and increase number of columns column.save() workflow.refresh_from_db() workflow.ncols += 1 workflow.set_query_builder_ops() workflow.save() # Add the new column to the DB try: sql.add_column_to_db( workflow.get_data_frame_table_name(), column.name, column.data_type, initial=column_initial_value) except Exception as exc: raise services.OnTaskWorkflowAddColumn( message=_('Unable to add element: {0}').format(str(exc)), to_delete=[column]) if action_column_event: acc, __ = models.ActionColumnConditionTuple.objects.get_or_create( action=action, column=column, condition=None) acc.log(user, action_column_event) column.log(user, models.Log.COLUMN_ADD)
def _create_track_column(action: Action) -> str: """Create an additional column for email tracking. :param action: Action to consider :return: column name """ # Make sure the column name does not collide with an existing one idx = 0 # Suffix to rename cnames = [col.name for col in action.workflow.columns.all()] while True: idx += 1 track_col_name = 'EmailRead_{0}'.format(idx) if track_col_name not in cnames: break # Add the column if needed (before the mass email to avoid overload # Create the new column and store column = Column( name=track_col_name, description_text='Emails sent with action {0} on {1}'.format( action.name, simplify_datetime_str( datetime.datetime.now(pytz.timezone(settings.TIME_ZONE))), ), workflow=action.workflow, data_type='integer', is_key=False, position=action.workflow.ncols + 1, ) column.save() # Increase the number of columns in the workflow action.workflow.ncols += 1 action.workflow.save() # Add the column to the DB table add_column_to_db(action.workflow.get_data_frame_table_name(), track_col_name, 'integer', 0) return track_col_name
def create(self, validated_data, **kwargs): """Create a new column.""" # Preliminary checks data_type = validated_data.get('data_type') if (data_type is None or data_type not in list(pandas_datatype_names.values())): # The data type is not legal raise Exception(_('Incorrect data type {0}.').format(data_type)) column_obj = None try: # Create the object, but point to the given workflow column_obj = Column( name=validated_data['name'], description_text=validated_data.get('description_text', ''), workflow=self.context['workflow'], data_type=data_type, is_key=validated_data.get('is_key', False), position=validated_data.get('position', 0), in_viz=validated_data.get('in_viz', True), active_from=validated_data.get('active_from'), active_to=validated_data.get('active_to'), ) # Set the categories if they exists column_obj.set_categories(validated_data.get('categories', []), True) if (column_obj.active_from and column_obj.active_to and column_obj.active_from > column_obj.active_to): raise Exception( _('Incorrect date/times in the active window for ' + 'column {0}').format(validated_data['name'])) # All tests passed, proceed to save the object. column_obj.save() except Exception as exc: if column_obj: column_obj.delete() raise exc return column_obj
def clean(self): """Check that the name is legal and the categories have right value.""" form_data = super().clean() # Load the data frame from the DB for various checks and leave it in # the form for future use self.data_frame = load_table( self.workflow.get_data_frame_table_name()) # Column name must be a legal variable name if 'name' in self.changed_data: # Name is legal msg = is_legal_name(form_data['name']) if msg: self.add_error('name', msg) return form_data # Check that the name is not present already if next( (col for col in self.workflow.columns.all() if col.id != self.instance.id and col.name == form_data['name']), None, ): # New column name collides with existing one self.add_error( 'name', _('There is a column already with this name')) return form_data # Categories must be valid types if 'raw_categories' in self.changed_data: if form_data['raw_categories']: # Condition 1: Values must be valid for the type of the column if self.allow_interval_as_initial and re.search( INTERVAL_PATTERN, form_data['raw_categories'] ): match = re.search( INTERVAL_PATTERN, form_data['raw_categories']) from_val = int(match.group('from')) to_val = int(match.group('to')) if from_val > to_val: tmp = from_val from_val = to_val to_val = tmp category_values = [ str(val) for val in range(from_val, to_val + 1)] else: category_values = [ cat.strip() for cat in form_data['raw_categories'].split(',')] try: valid_values = Column.validate_column_values( form_data['data_type'], category_values) except ValueError: self.add_error( 'raw_categories', _('Incorrect list of values'), ) return form_data # Condition 2: The values in the dataframe column must be in # these categories (only if the column is being edited, though if self.instance.name and not all( vval in valid_values for vval in self.data_frame[self.instance.name] if vval and not pd.isnull(vval) ): self.add_error( 'raw_categories', _( 'The values in the column are not compatible ' + ' with these ones.')) return form_data else: valid_values = [] self.instance.set_categories(valid_values) # Check the datetimes. One needs to be after the other a_from = self.cleaned_data.get('active_from') a_to = self.cleaned_data.get('active_to') if a_from and a_to and a_from >= a_to: self.add_error('active_from', _('Incorrect date/time window')) self.add_error('active_to', _('Incorrect date/time window')) # Check and force a correct column index ncols = self.workflow.columns.count() if form_data['position'] < 1 or form_data['position'] > ncols: form_data['position'] = ncols + 1 return form_data
def delete_column( user: get_user_model(), workflow: models.Workflow, column: models.Column, cond_to_delete: Optional[List[models.Condition]] = None, ): """Remove column from ontask.workflow. Given a workflow and a column, removes it from the workflow (and the corresponding data frame :param user: User performing the operation :param workflow: Workflow object :param column: Column object to delete :param cond_to_delete: List of conditions to delete after removing the column :return: Nothing. Effect reflected in the database """ column.log(user, models.Log.COLUMN_DELETE) # Drop the column from the DB table storing the data frame sql.df_drop_column(workflow.get_data_frame_table_name(), column.name) # Reposition the columns above the one being deleted workflow.reposition_columns(column.position, workflow.ncols + 1) # Delete the column column.delete() # Update the information in the workflow workflow.ncols = workflow.ncols - 1 workflow.save() if not cond_to_delete: # The conditions to delete are not given, so calculate them # Get the conditions/actions attached to this workflow cond_to_delete = [ cond for cond in models.Condition.objects.filter( action__workflow=workflow) if column in cond.columns.all() ] # If a column disappears, the conditions that contain that variable # are removed actions_without_filters = [] for condition in cond_to_delete: if condition.is_filter: actions_without_filters.append(condition.action) # Formula has the name of the deleted column. Delete it condition.delete() # Traverse the actions for which the filter has been deleted and reassess # all their conditions # TODO: Explore how to do this asynchronously (or lazy) for act in actions_without_filters: act.update_n_rows_selected() # If a column disappears, the views that contain only that column need to # disappear as well as they are no longer relevant. for view in workflow.views.all(): if view.columns.count() == 0: view.delete()
def add_random_column( user, workflow: models.Workflow, column: models.Column, data_frame: pd.DataFrame, ): """Add the formula column to the workflow. :param user: User making the request :param workflow: Workflow to add the column :param column: Column being added :param data_frame: Data frame of the current workflow :return: Column is added to the workflow """ # Save the column object attached to the form and add additional fields column.workflow = workflow column.is_key = False # Detect the case of a single integer as initial value so that it is # expanded try: int_value = int(column.categories[0]) if int_value <= 1: raise services.OnTaskWorkflowIntegerLowerThanOne( field_name='values', message=_('The integer value has to be larger than 1')) column.set_categories([idx + 1 for idx in range(int_value)]) except (ValueError, TypeError, IndexError): pass column.save() # Empty new column new_column = [None] * workflow.nrows # Create the random partitions partitions = _partition([idx for idx in range(workflow.nrows)], len(column.categories)) # Assign values to partitions for idx, indexes in enumerate(partitions): for col_idx in indexes: new_column[col_idx] = column.categories[idx] # Assign the new column to the data frame data_frame[column.name] = new_column # Update the positions of the appropriate columns workflow.reposition_columns(workflow.ncols + 1, column.position) workflow.refresh_from_db() # Store the df to DB try: pandas.store_dataframe(data_frame, workflow) except Exception as exc: raise services.OnTaskWorkflowStoreError( message=_('Unable to add the column: {0}').format(str(exc)), to_delete=[column]) workflow.ncols = workflow.columns.count() workflow.save() # Log the event column.log(user, models.Log.COLUMN_ADD_RANDOM)