Ejemplo n.º 1
0
def restrict_column(user, column: models.Column):
    """Set category of the column to the existing set of values.

    Given a workflow and a column, modifies the column so that only the
    values already present are allowed for future updates.

    :param user: User executing this action
    :param column: Column object to restrict
    :return: String with error or None if correct
    """
    # Load the data frame
    data_frame = pandas.load_table(column.workflow.get_data_frame_table_name())

    cat_values = set(data_frame[column.name].dropna())
    if not cat_values:
        raise errors.OnTaskColumnCategoryValueError(
            message=_('The column has no meaningful values'))

    # Set categories
    column.set_categories(list(cat_values))

    # Re-evaluate the operands in the workflow
    column.log(user, models.Log.COLUMN_RESTRICT)
    column.workflow.set_query_builder_ops()
    column.workflow.save()
Ejemplo n.º 2
0
def add_column_to_workflow(
    user,
    workflow: models.Workflow,
    column: models.Column,
    column_initial_value: Any,
    action_column_event: Optional[str] = None,
    action: Optional[models.Action] = None,
):
    """Add a new column to the workflow.

    :param user: User executing the operation
    :param workflow: Used workflow
    :param column: Column to insert
    :param column_initial_value: Initial value for the column
    :param action_column_event: Event to record if the action/column needs to
    be stored
    :param action: If the column is a question, this is the action in which it
    is being used.
    :return: Nothing. Column added to Wflow and DB
    """
    # Catch the special case of integer type and no initial value. Pandas
    # encodes it as NaN but a cycle through the database transforms it into
    # a string. To avoid this case, integer + empty value => double
    if column.data_type == 'integer' and column_initial_value is None:
        column.data_type = 'double'

    # Fill in the remaining fields in the column
    column.workflow = workflow
    column.is_key = False

    # Update the positions of the appropriate columns
    workflow.reposition_columns(workflow.ncols + 1, column.position)

    # Save column, refresh workflow, and increase number of columns
    column.save()
    workflow.refresh_from_db()
    workflow.ncols += 1
    workflow.set_query_builder_ops()
    workflow.save()

    # Add the new column to the DB
    try:
        sql.add_column_to_db(
            workflow.get_data_frame_table_name(),
            column.name,
            column.data_type,
            initial=column_initial_value)
    except Exception as exc:
        raise services.OnTaskWorkflowAddColumn(
            message=_('Unable to add element: {0}').format(str(exc)),
            to_delete=[column])

    if action_column_event:
        acc, __ = models.ActionColumnConditionTuple.objects.get_or_create(
            action=action,
            column=column,
            condition=None)
        acc.log(user, action_column_event)
    column.log(user, models.Log.COLUMN_ADD)
Ejemplo n.º 3
0
def add_random_column(
    user,
    workflow: models.Workflow,
    column: models.Column,
    data_frame: pd.DataFrame,
):
    """Add the formula column to the workflow.

    :param user: User making the request
    :param workflow: Workflow to add the column
    :param column: Column being added
    :param data_frame: Data frame of the current workflow
    :return: Column is added to the workflow
    """
    # Empty new column
    new_column = [None] * workflow.nrows
    categories = column.get_categories()
    # Create the random partitions
    partitions = _partition(
        [idx for idx in range(workflow.nrows)],
        len(categories))

    # Assign values to partitions
    for idx, indexes in enumerate(partitions):
        for col_idx in indexes:
            new_column[col_idx] = categories[idx]

    # Assign the new column to the data frame
    data_frame[column.name] = pd.Series(
        new_column,
        dtype=_ontask_type_to_pd_type[column.data_type],
        index=data_frame.index)

    if column.data_type == 'datetime':
        data_frame[column.name] = data_frame[column.name].dt.tz_localize(
            settings.TIME_ZONE)

    # Update the positions of the appropriate columns
    workflow.reposition_columns(workflow.ncols + 1, column.position)
    workflow.refresh_from_db()

    # Store the df to DB
    try:
        pandas.store_dataframe(data_frame, workflow)
    except Exception as exc:
        raise services.OnTaskWorkflowStoreError(
            message=_('Unable to add the column: {0}').format(str(exc)),
            to_delete=[column])

    workflow.ncols = workflow.columns.count()
    workflow.save()

    # Log the event
    column.log(user, models.Log.COLUMN_ADD_RANDOM)
Ejemplo n.º 4
0
def update_column(
    user,
    workflow: models.Workflow,
    column: models.Column,
    old_name: str,
    old_position: int,
    action_column_item: Optional[models.Action] = None,
    action_column_event: Optional[str] = None,
):
    """Update information in a column.

    :param user: User performing the operation
    :param workflow: Workflow being processed
    :param column: Column being modified
    :param old_name: Old name to detect renaming
    :param old_position: old position to detect repositioning
    :param action_column_item: Action/column item in case the action is stored
    in that table.
    :param action_column_event: Event to record the operation on the
    action/column
    :return: Nothing. Side effect in the workflow.
    """
    # If there is a new name, rename the data frame columns
    if old_name != column.name:
        sql.db_rename_column(
            workflow.get_data_frame_table_name(),
            old_name,
            column.name)
        pandas.rename_df_column(workflow, old_name, column.name)

    if old_position != column.position:
        # Update the positions of the appropriate columns
        workflow.reposition_columns(old_position, column.position)

    column.save()

    # Go back to the DB because the prefetch columns are not valid
    # any more
    workflow = models.Workflow.objects.prefetch_related('columns').get(
        id=workflow.id,
    )

    # Changes in column require rebuilding the query_builder_ops
    workflow.set_query_builder_ops()

    # Save the workflow
    workflow.save()

    if action_column_item:
        action_column_item.log(user, action_column_event)

    # Log the event
    column.log(user, models.Log.COLUMN_EDIT)
Ejemplo n.º 5
0
def add_formula_column(
    user,
    workflow: models.Workflow,
    column: models.Column,
    operation: str,
    selected_columns: List[models.Column],
):
    """Add the formula column to the workflow.

    :param user: User making the request
    :param workflow: Workflow to add the column
    :param column: Column being added
    :param operation: string denoting the operation
    :param selected_columns: List of columns selected for the operation.
    :return: Column is added to the workflow
    """
    # Save the column object attached to the form and add additional fields
    column.workflow = workflow
    column.is_key = False

    # Save the instance
    column.save()

    # Update the data frame
    df = pandas.load_table(workflow.get_data_frame_table_name())

    # Add the column with the appropriate computation
    cnames = [col.name for col in selected_columns]
    df[column.name] = _op_distrib[operation](df[cnames])

    # Populate the column type
    column.data_type = pandas.datatype_names.get(
        df[column.name].dtype.name)

    # Update the positions of the appropriate columns
    workflow.reposition_columns(workflow.ncols + 1, column.position)
    column.save()
    workflow.refresh_from_db()

    # Store the df to DB
    try:
        pandas.store_dataframe(df, workflow)
    except Exception as exc:
        raise services.OnTaskWorkflowAddColumn(
            message=_('Unable to add column: {0}').format(str(exc)),
            to_delete=[column])

    workflow.ncols = workflow.columns.count()
    workflow.save()
    column.log(user, models.Log.COLUMN_ADD_FORMULA)
Ejemplo n.º 6
0
def delete_column(
    user: get_user_model(),
    workflow: models.Workflow,
    column: models.Column,
    cond_to_delete: Optional[List[models.Condition]] = None,
):
    """Remove column from ontask.workflow.

    Given a workflow and a column, removes it from the workflow (and the
    corresponding data frame

    :param user: User performing the operation
    :param workflow: Workflow object
    :param column: Column object to delete
    :param cond_to_delete: List of conditions to delete after removing the
    column
    :return: Nothing. Effect reflected in the database
    """
    column.log(user, models.Log.COLUMN_DELETE)

    # Drop the column from the DB table storing the data frame
    sql.df_drop_column(workflow.get_data_frame_table_name(), column.name)

    # Reposition the columns above the one being deleted
    workflow.reposition_columns(column.position, workflow.ncols + 1)

    # Delete the column
    column.delete()

    # Update the information in the workflow
    workflow.ncols = workflow.ncols - 1
    workflow.save()

    if not cond_to_delete:
        # The conditions to delete are not given, so calculate them
        # Get the conditions/actions attached to this workflow
        cond_to_delete = [
            cond for cond in models.Condition.objects.filter(
                action__workflow=workflow) if column in cond.columns.all()
        ]

    # If a column disappears, the conditions that contain that variable
    # are removed
    actions_without_filters = []
    for condition in cond_to_delete:
        if condition.is_filter:
            actions_without_filters.append(condition.action)

        # Formula has the name of the deleted column. Delete it
        condition.delete()

    # Traverse the actions for which the filter has been deleted and reassess
    #  all their conditions
    # TODO: Explore how to do this asynchronously (or lazy)
    for act in actions_without_filters:
        act.update_n_rows_selected()

    # If a column disappears, the views that contain only that column need to
    # disappear as well as they are no longer relevant.
    for view in workflow.views.all():
        if view.columns.count() == 0:
            view.delete()
Ejemplo n.º 7
0
def add_random_column(
    user,
    workflow: models.Workflow,
    column: models.Column,
    data_frame: pd.DataFrame,
):
    """Add the formula column to the workflow.

    :param user: User making the request
    :param workflow: Workflow to add the column
    :param column: Column being added
    :param data_frame: Data frame of the current workflow
    :return: Column is added to the workflow
    """
    # Save the column object attached to the form and add additional fields
    column.workflow = workflow
    column.is_key = False

    # Detect the case of a single integer as initial value so that it is
    # expanded
    try:
        int_value = int(column.categories[0])
        if int_value <= 1:
            raise services.OnTaskWorkflowIntegerLowerThanOne(
                field_name='values',
                message=_('The integer value has to be larger than 1'))
        column.set_categories([idx + 1 for idx in range(int_value)])
    except (ValueError, TypeError, IndexError):
        pass

    column.save()

    # Empty new column
    new_column = [None] * workflow.nrows
    # Create the random partitions
    partitions = _partition([idx for idx in range(workflow.nrows)],
                            len(column.categories))

    # Assign values to partitions
    for idx, indexes in enumerate(partitions):
        for col_idx in indexes:
            new_column[col_idx] = column.categories[idx]

    # Assign the new column to the data frame
    data_frame[column.name] = new_column

    # Update the positions of the appropriate columns
    workflow.reposition_columns(workflow.ncols + 1, column.position)
    workflow.refresh_from_db()

    # Store the df to DB
    try:
        pandas.store_dataframe(data_frame, workflow)
    except Exception as exc:
        raise services.OnTaskWorkflowStoreError(
            message=_('Unable to add the column: {0}').format(str(exc)),
            to_delete=[column])

    workflow.ncols = workflow.columns.count()
    workflow.save()

    # Log the event
    column.log(user, models.Log.COLUMN_ADD_RANDOM)