Ejemplo n.º 1
0
def _get_df_and_columns(
    workflow: Workflow,
    pk: int,
) -> Optional[Tuple[DataFrame, List, View]]:
    """Get the DF and the columns to process.

    :param workflow: Workflow object
    :param pk: Optional id for a view
    :return: Tuple DataFrame, List of columns (None, None if error
    """
    # If a view is given, filter the columns
    view = None
    if pk:
        view = workflow.views.filter(pk=pk).first()
        if not view:
            # View not found. Redirect to workflow detail
            return None
        columns_to_view = view.columns.filter(is_key=False)

        df = load_table(workflow.get_data_frame_table_name(),
                        [col.name for col in columns_to_view], view.formula)
    else:
        # No view given, fetch the entire data frame
        columns_to_view = workflow.columns.filter(is_key=False)
        df = load_table(workflow.get_data_frame_table_name(),
                        [col.name for col in columns_to_view])

    return df, columns_to_view, view
Ejemplo n.º 2
0
def _do_clone_workflow(workflow: Workflow) -> Workflow:
    """Clone the workflow.

    :param workflow: source workflow

    :return: Clone object
    """
    new_workflow = Workflow(
        user=workflow.user,
        name=create_new_name(
            workflow.name,
            Workflow.objects.filter(
                Q(user=workflow.user) | Q(shared=workflow.user)),
        ),
        description_text=workflow.description_text,
        nrows=workflow.nrows,
        ncols=workflow.ncols,
        attributes=copy.deepcopy(workflow.attributes),
        query_builder_ops=copy.deepcopy(workflow.query_builder_ops),
        luser_email_column_md5=workflow.luser_email_column_md5,
        lusers_is_outdated=workflow.lusers_is_outdated)
    new_workflow.save()

    try:
        new_workflow.shared.set(list(workflow.shared.all()))
        new_workflow.lusers.set(list(workflow.lusers.all()))

        # Clone the columns
        for item_obj in workflow.columns.all():
            do_clone_column_only(item_obj, new_workflow=new_workflow)

        # Update the luser_email_column if needed:
        if workflow.luser_email_column:
            new_workflow.luser_email_column = new_workflow.columns.get(
                name=workflow.luser_email_column.name, )

        # Clone the DB table
        clone_table(workflow.get_data_frame_table_name(),
                    new_workflow.get_data_frame_table_name())

        # Clone actions
        for item_obj in workflow.actions.all():
            do_clone_action(item_obj, new_workflow)

        for item_obj in workflow.views.all():
            do_clone_view(item_obj, new_workflow)

        # Done!
        new_workflow.save()
    except Exception as exc:
        new_workflow.delete()
        raise exc

    return new_workflow
Ejemplo n.º 3
0
def perform_row_delete(workflow: models.Workflow, row_key: str,
                       row_value: Any):
    """Perform the row deletion operation.

    :param workflow: Workflow being considered
    :param row_key: Name of the key column to select the row
    :param row_value: Value of the previous column to select the row.
    :return: Reflected in the workflow
    """
    # if there is no key or value, flag the message and return to table
    # view
    if not row_key or not row_value:
        raise OnTaskTableNoKeyValueError(
            message=_('Incorrect URL invoked to delete a row'))
        # The response will require going to the table display anyway

    # Proceed to delete the row
    sql.delete_row(workflow.get_data_frame_table_name(), (row_key, row_value))

    # Update rowcount
    workflow.nrows -= 1
    workflow.save(update_fields=['nrows'])

    # Update the value of all the conditions in the actions
    # TODO: Explore how to do this asynchronously (or lazy)
    for action in workflow.actions.all():
        action.update_n_rows_selected()
Ejemplo n.º 4
0
def do_clone_workflow(user, workflow: models.Workflow) -> models.Workflow:
    """Clone a workflow.

    :param user: User performing the operation
    :param workflow: source workflow
    :return: Cloned object
    """
    new_workflow = models.Workflow(
        user=workflow.user,
        name=create_new_name(
            workflow.name,
            models.Workflow.objects.filter(
                Q(user=workflow.user) | Q(shared=workflow.user)),
        ),
        description_text=workflow.description_text,
        nrows=workflow.nrows,
        ncols=workflow.ncols,
        attributes=copy.deepcopy(workflow.attributes),
        query_builder_ops=copy.deepcopy(workflow.query_builder_ops),
        luser_email_column_md5=workflow.luser_email_column_md5,
        lusers_is_outdated=workflow.lusers_is_outdated)
    new_workflow.save()

    try:
        new_workflow.shared.set(list(workflow.shared.all()))
        new_workflow.lusers.set(list(workflow.lusers.all()))

        # Clone the columns
        for item_obj in workflow.columns.all():
            do_clone_column_only(item_obj, new_workflow=new_workflow)

        # Update the luser_email_column if needed:
        if workflow.luser_email_column:
            new_workflow.luser_email_column = new_workflow.columns.get(
                name=workflow.luser_email_column.name, )

        # Clone the DB table
        sql.clone_table(workflow.get_data_frame_table_name(),
                        new_workflow.get_data_frame_table_name())

        # Clone actions
        for item_obj in workflow.actions.all():
            do_clone_action(user, item_obj, new_workflow)

        for item_obj in workflow.views.all():
            do_clone_view(user, item_obj, new_workflow)

        # Done!
        new_workflow.save()
    except Exception as exc:
        raise OnTaskServiceException(
            message=_('Error while cloning workflow: {0}').format(exc),
            to_delete=[new_workflow])

    workflow.log(user,
                 models.Log.WORKFLOW_CLONE,
                 id_old=workflow.id,
                 name_old=workflow.name)

    return new_workflow
Ejemplo n.º 5
0
def update_row_values(
    workflow: models.Workflow,
    update_key: str,
    update_val: Any,
    row_values: List[Any],
):
    """Update a row with the information in the form.

    :param workflow: Workflow object being processed.
    :param update_key: Column name to use to select row.
    :param update_val: Column value to select row.
    :param row_values: List of values to store in the row
    :return: Nothing.
    """
    # Create the query to update the row
    column_names = [col.name for col in workflow.columns.all()]
    with transaction.atomic():
        # Update the row in the db
        sql.update_row(workflow.get_data_frame_table_name(),
                       column_names,
                       row_values,
                       filter_dict={update_key: update_val})
        # verify that the "key" property is maintained in all the
        # columns.
        checks.check_key_columns(workflow)

    # Recompute all the values of the conditions in each of the actions
    # TODO: Explore how to do this asynchronously (or lazy)
    for act in workflow.actions.all():
        act.update_n_rows_selected()
Ejemplo n.º 6
0
def create_row(workflow: models.Workflow, row_values: List[Any]):
    """Create an additional row with the information in the form.

    :param workflow: Workflow object being processed.
    :param row_values: List of values to store.
    :return: Nothing.
    """
    # Create the query to update the row
    columns = workflow.columns.all()
    column_names = [col.name for col in columns]

    with transaction.atomic():
        # Insert the new row in the db
        sql.insert_row(workflow.get_data_frame_table_name(), column_names,
                       row_values)
        # verify that the "key" property is maintained in all the
        # columns.
        checks.check_key_columns(workflow)

    # Update number of rows
    workflow.nrows += 1
    workflow.save()

    # Recompute all the values of the conditions in each of the actions
    # TODO: Explore how to do this asynchronously (or lazy)
    for act in workflow.actions.all():
        act.update_n_rows_selected()
Ejemplo n.º 7
0
def _get_column_visualization_items(
    workflow: Workflow,
    column: Column,
):
    """Get the visualization items (scripts and HTML) for a column.

    :param workflow: Workflow being processed

    :param column: Column requested

    :return: Tuple stat_data with descriptive stats, visualization scripts and
    visualization HTML
    """
    # Get the dataframe
    df = load_table(workflow.get_data_frame_table_name(), [column.name])

    # Extract the data to show at the top of the page
    stat_data = get_column_statistics(df[column.name])

    vis_scripts = []
    visualizations = _get_column_visualisations(
        column,
        df[[column.name]],
        vis_scripts,
        context={
            'style': 'width:100%; height:100%;' + 'display:inline-block;'
        },
    )

    return stat_data, vis_scripts, visualizations
Ejemplo n.º 8
0
def add_column_to_workflow(
    user,
    workflow: models.Workflow,
    column: models.Column,
    column_initial_value: Any,
    action_column_event: Optional[str] = None,
    action: Optional[models.Action] = None,
):
    """Add a new column to the workflow.

    :param user: User executing the operation
    :param workflow: Used workflow
    :param column: Column to insert
    :param column_initial_value: Initial value for the column
    :param action_column_event: Event to record if the action/column needs to
    be stored
    :param action: If the column is a question, this is the action in which it
    is being used.
    :return: Nothing. Column added to Wflow and DB
    """
    # Catch the special case of integer type and no initial value. Pandas
    # encodes it as NaN but a cycle through the database transforms it into
    # a string. To avoid this case, integer + empty value => double
    if column.data_type == 'integer' and column_initial_value is None:
        column.data_type = 'double'

    # Fill in the remaining fields in the column
    column.workflow = workflow
    column.is_key = False

    # Update the positions of the appropriate columns
    workflow.reposition_columns(workflow.ncols + 1, column.position)

    # Save column, refresh workflow, and increase number of columns
    column.save()
    workflow.refresh_from_db()
    workflow.ncols += 1
    workflow.set_query_builder_ops()
    workflow.save()

    # Add the new column to the DB
    try:
        sql.add_column_to_db(
            workflow.get_data_frame_table_name(),
            column.name,
            column.data_type,
            initial=column_initial_value)
    except Exception as exc:
        raise services.OnTaskWorkflowAddColumn(
            message=_('Unable to add element: {0}').format(str(exc)),
            to_delete=[column])

    if action_column_event:
        acc, __ = models.ActionColumnConditionTuple.objects.get_or_create(
            action=action,
            column=column,
            condition=None)
        acc.log(user, action_column_event)
    column.log(user, models.Log.COLUMN_ADD)
Ejemplo n.º 9
0
def update_luser_email_column(
    user,
    pk: int,
    workflow: models.Workflow,
    column: models.Column,
):
    """Update the field luser_email in the workflow.

    :param user: User making the request
    :param pk: Column ID to obtain the user id
    :param workflow: Workflow being manipulated.
    :param column: Column being used to update the luser field.
    :return:
    """
    if not pk:
        # Empty pk, means reset the field.
        workflow.luser_email_column = None
        workflow.luser_email_column_md5 = ''
        workflow.lusers.set([])
        workflow.save(
            update_fields=['luser_email_column', 'luser_email_column_md5'])
        return

    table_name = workflow.get_data_frame_table_name()

    # Get the column content
    emails = sql.get_rows(table_name, column_names=[column.name])

    # Verify that the column as a valid set of emails
    incorrect_email = get_incorrect_email([row[column.name] for row in emails])
    if incorrect_email:
        raise services.OnTaskWorkflowEmailError(message=_(
            'Incorrect email addresses "{0}".').format(incorrect_email))

    # Update the column
    workflow.luser_email_column = column
    workflow.save(update_fields=['luser_email_column'])

    # Calculate the MD5 value
    md5_hash = sql.get_text_column_hash(table_name, column.name)

    if workflow.luser_email_column_md5 == md5_hash:
        return

    # Change detected, run the update in batch mode
    workflow.luser_email_column_md5 = md5_hash
    workflow.save(update_fields=['luser_email_column_md5'])

    # Log the event with the status "preparing updating"
    log_item = workflow.log(user, models.Log.WORKFLOW_UPDATE_LUSERS)

    # Push the update of lusers to batch processing
    tasks.execute_operation.delay(
        operation_type=models.Log.WORKFLOW_UPDATE_LUSERS,
        user_id=user.id,
        workflow_id=workflow.id,
        log_id=log_item.id)
Ejemplo n.º 10
0
def check_wf_df(workflow: models.Workflow) -> bool:
    """Check consistency between Workflow info and the data frame.

    Check the consistency between the information stored in the workflow
    and the structure of the underlying dataframe

    :param workflow: Workflow object
    :return: Boolean stating the result of the check. True: Correct.
    """
    # Get the df
    df = pandas.load_table(workflow.get_data_frame_table_name())

    # Set values in case there is no df
    if df is not None:
        dfnrows = df.shape[0]
        dfncols = df.shape[1]
        df_col_names = list(df.columns)
    else:
        dfnrows = 0
        dfncols = 0
        df_col_names = []

    # Check 1: Number of rows and columns
    assert workflow.nrows == dfnrows, 'Inconsistent number of rows'
    assert workflow.ncols == dfncols, 'Inconsistent number of columns'

    # Identical sets of columns
    wf_cols = workflow.columns.all()
    assert set(df_col_names) == {col.name
                                 for col in wf_cols
                                 }, ('Inconsistent set of columns')

    # Identical data types
    # for n1, n2 in zip(wf_cols, df_col_names):
    for col in wf_cols:
        df_dt = pandas.datatype_names.get(df[col.name].dtype.name)
        if col.data_type == 'boolean' and df_dt == 'string':
            # This is the case of a column with Boolean and Nulls
            continue

        assert col.data_type == df_dt, ('Inconsistent data type {0}'.format(
            col.name))

    # Verify that the columns marked as unique are preserved
    for col in workflow.columns.filter(is_key=True):
        assert pandas.is_unique_column(
            df[col.name]), ('Column {0} should be unique.'.format(col.name))

    # Columns are properly numbered
    cpos = workflow.columns.values_list('position', flat=True)
    rng = range(1, len(cpos) + 1)
    assert sorted(cpos) == list(rng)

    return True
Ejemplo n.º 11
0
def update_column(
    user,
    workflow: models.Workflow,
    column: models.Column,
    old_name: str,
    old_position: int,
    action_column_item: Optional[models.Action] = None,
    action_column_event: Optional[str] = None,
):
    """Update information in a column.

    :param user: User performing the operation
    :param workflow: Workflow being processed
    :param column: Column being modified
    :param old_name: Old name to detect renaming
    :param old_position: old position to detect repositioning
    :param action_column_item: Action/column item in case the action is stored
    in that table.
    :param action_column_event: Event to record the operation on the
    action/column
    :return: Nothing. Side effect in the workflow.
    """
    # If there is a new name, rename the data frame columns
    if old_name != column.name:
        sql.db_rename_column(
            workflow.get_data_frame_table_name(),
            old_name,
            column.name)
        pandas.rename_df_column(workflow, old_name, column.name)

    if old_position != column.position:
        # Update the positions of the appropriate columns
        workflow.reposition_columns(old_position, column.position)

    column.save()

    # Go back to the DB because the prefetch columns are not valid
    # any more
    workflow = models.Workflow.objects.prefetch_related('columns').get(
        id=workflow.id,
    )

    # Changes in column require rebuilding the query_builder_ops
    workflow.set_query_builder_ops()

    # Save the workflow
    workflow.save()

    if action_column_item:
        action_column_item.log(user, action_column_event)

    # Log the event
    column.log(user, models.Log.COLUMN_EDIT)
Ejemplo n.º 12
0
def add_formula_column(
    user,
    workflow: models.Workflow,
    column: models.Column,
    operation: str,
    selected_columns: List[models.Column],
):
    """Add the formula column to the workflow.

    :param user: User making the request
    :param workflow: Workflow to add the column
    :param column: Column being added
    :param operation: string denoting the operation
    :param selected_columns: List of columns selected for the operation.
    :return: Column is added to the workflow
    """
    # Save the column object attached to the form and add additional fields
    column.workflow = workflow
    column.is_key = False

    # Save the instance
    column.save()

    # Update the data frame
    df = pandas.load_table(workflow.get_data_frame_table_name())

    # Add the column with the appropriate computation
    cnames = [col.name for col in selected_columns]
    df[column.name] = _op_distrib[operation](df[cnames])

    # Populate the column type
    column.data_type = pandas.datatype_names.get(
        df[column.name].dtype.name)

    # Update the positions of the appropriate columns
    workflow.reposition_columns(workflow.ncols + 1, column.position)
    column.save()
    workflow.refresh_from_db()

    # Store the df to DB
    try:
        pandas.store_dataframe(df, workflow)
    except Exception as exc:
        raise services.OnTaskWorkflowAddColumn(
            message=_('Unable to add column: {0}').format(str(exc)),
            to_delete=[column])

    workflow.ncols = workflow.columns.count()
    workflow.save()
    column.log(user, models.Log.COLUMN_ADD_FORMULA)
Ejemplo n.º 13
0
def check_luser_email_column_outdated(workflow: models.Workflow):
    """Detect if the luser_email column is up to date.

    :param workflow: Workflow being manipulated.
    :return: Side effect, md5_hash_is_outdated is updated.
    """
    if workflow.luser_email_column:
        md5_hash = sql.get_text_column_hash(
            workflow.get_data_frame_table_name(),
            workflow.luser_email_column.name)
        if md5_hash != workflow.luser_email_column_md5:
            # Information is outdated
            workflow.lusers_is_outdated = True
            workflow.save(update_fields=['lusers_is_outdated'])
Ejemplo n.º 14
0
def upload_step_four(
    request: http.HttpRequest,
    workflow: models.Workflow,
    upload_data: Dict,
) -> http.HttpResponse:
    """Perform the merge operation.

    :param request: Received request
    :param workflow: Workflow being processed
    :param upload_data: Dictionary with all the information about the merge.
    :return: HttpResponse
    """
    # Get the dataframes to merge
    try:
        dst_df = pandas.load_table(workflow.get_data_frame_table_name())
        src_df = pandas.load_table(workflow.get_upload_table_name())
    except Exception:
        return render(request, 'error.html',
                      {'message': _('Exception while loading data frame')})

    try:
        pandas.perform_dataframe_upload_merge(workflow, dst_df, src_df,
                                              upload_data)
    except Exception as exc:
        # Nuke the temporary table
        sql.delete_table(workflow.get_upload_table_name())
        col_info = workflow.get_column_info()
        workflow.log(request.user,
                     models.Log.WORKFLOW_DATA_FAILEDMERGE,
                     column_names=col_info[0],
                     column_types=col_info[1],
                     column_unique=col_info[2],
                     error_message=str(exc))
        messages.error(request, _('Merge operation failed. ') + str(exc))
        return redirect(reverse('table:display'))

    col_info = workflow.get_column_info()
    workflow.log(request.user,
                 upload_data['log_upload'],
                 column_names=col_info[0],
                 column_types=col_info[1],
                 column_unique=col_info[2])
    store_workflow_in_session(request.session, workflow)
    request.session.pop('upload_data', None)

    return redirect(reverse('table:display'))
Ejemplo n.º 15
0
def check_key_columns(workflow: models.Workflow):
    """Check that key columns maintain their property.

    Function used to verify that after changes in the DB the key columns
    maintain their property.

    :param workflow: Object to use for the verification.
    :return: Nothing. Raise exception if key column lost the property.
    """
    col_name = next((col.name for col in workflow.columns.filter(is_key=True)
                     if not sql.is_column_unique(
                         workflow.get_data_frame_table_name(), col.name)),
                    None)
    if col_name:
        raise Exception(
            _('The new data does not preserve the key ' +
              'property of column "{0}"'.format(col_name)))
Ejemplo n.º 16
0
def do_workflow_update_lusers(workflow: Workflow, log_item: Log):
    """Recalculate the field lusers.

    Recalculate the elements in the field lusers of the workflow based on the
    fields luser_email_column and luser_email_column_MD5

    :param workflow: Workflow to update

    :param log_item: Log where to leave the status of the operation

    :return: Changes in the lusers ManyToMany relationships
    """
    # Get the column content
    emails = get_rows(workflow.get_data_frame_table_name(),
                      column_names=[workflow.luser_email_column.name])

    luser_list = []
    created = 0
    for row in emails:
        uemail = row[workflow.luser_email_column.name]
        luser = get_user_model().objects.filter(email=uemail).first()
        if not luser:
            # Create user
            if settings.DEBUG:
                # Define users with the same password in development
                password = '******'  # NOQA
            else:
                password = get_random_string(length=RANDOM_PWD_LENGTH)
            luser = get_user_model().objects.create_user(
                email=uemail,
                password=password,
            )
            created += 1

        luser_list.append(luser)

    # Assign result
    workflow.lusers.set(luser_list)

    # Report status
    log_item.payload['total_users'] = emails.rowcount
    log_item.payload['new_users'] = created
    log_item.payload['status'] = ugettext(
        'Learner emails successfully updated.', )
    log_item.save()
Ejemplo n.º 17
0
def create_survey_table(
    workflow: models.Workflow,
    action: models.Action,
    dt_page: DataTablesServerSidePaging,
) -> http.JsonResponse:
    """Create the table with the survey entries for instructor.

    :param workflow: Workflow being processed
    :param action: Action representing the survey
    :param dt_page: Data tables server side paging object
    :return : JSon respnse
    """
    columns = [ccpair.column for ccpair in action.column_condition_pair.all()]
    query_set = _create_initial_qs(
        workflow.get_data_frame_table_name(),
        action.get_filter_formula(),
        columns,
        dt_page,
    )

    filtered = len(query_set)

    # Get the subset of the qs to show in the table
    query_set = _create_table_qsdata(
        action.id,
        query_set,
        dt_page,
        columns,
        next(idx for idx, col in enumerate(columns) if col.is_key),
    )

    return http.JsonResponse({
        'draw': dt_page.draw,
        'recordsTotal': workflow.nrows,
        'recordsFiltered': filtered,
        'data': query_set,
    })
Ejemplo n.º 18
0
def batch_load_df_from_athenaconnection(
    workflow: models.Workflow,
    conn: models.AthenaConnection,
    run_params: Dict,
    log_item: models.Log,
):
    """Batch load a DF from an Athena connection.

    run_params has:
    aws_secret_access_key: Optional[str] = None,
    aws_session_token: Optional[str] = None,
    table_name: Optional[str] = None
    key_column_name[str] = None
    merge_method[str] = None

    from pyathena import connect
    from pyathena.pandas_cursor import PandasCursor

    cursor = connect(
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key,
        aws_session_token=aws_session_token,
        s3_staging_dir=staging_dir,
        region_name=region_name)

    df = pd.read_sql('SELECT * FROM given_table_name', cursor)
    print(df.describe())
    print(df.head())

    :param workflow: Workflow to store the new data
    :param conn: AthenaConnection object with the connection parameters.
    :param run_params: Dictionary with additional connection parameters
    :param log_item: Log object to reflect the status of the execution
    :return: Nothing.
    """
    staging_dir = 's3://{0}'.format(conn.aws_bucket_name)
    if conn.aws_file_path:
        staging_dir = staging_dir + '/' + conn.aws_file_path

    cursor = connect(aws_access_key_id=conn.aws_access_key,
                     aws_secret_access_key=run_params['aws_secret_access_key'],
                     aws_session_token=run_params['aws_session_token'],
                     s3_staging_dir=staging_dir,
                     region_name=conn.aws_region_name)

    data_frame = pd.read_sql_table(run_params['table_name'], cursor)

    # Strip white space from all string columns and try to convert to
    # datetime just in case
    data_frame = pandas.detect_datetime_columns(data_frame)

    pandas.verify_data_frame(data_frame)

    col_names, col_types, is_key = pandas.store_temporary_dataframe(
        data_frame, workflow)

    upload_data = {
        'initial_column_names': col_names,
        'col_types': col_types,
        'src_is_key_column': is_key,
        'rename_column_names': col_names[:],
        'columns_to_upload': [True] * len(col_names),
        'keep_key_column': is_key[:]
    }

    if not workflow.has_data_frame():
        # Regular load operation
        pandas.store_workflow_table(workflow, upload_data)
        log_item.payload['col_names'] = col_names
        log_item.payload['col_types'] = col_types
        log_item.payload['column_unique'] = is_key
        log_item.payload['num_rows'] = workflow.nrows
        log_item.payload['num_cols'] = workflow.ncols
        log_item.save(update_fields=['payload'])
        return

    # Merge operation
    upload_data['dst_column_names'] = workflow.get_column_names()
    upload_data['dst_is_unique_column'] = workflow.get_column_unique()
    upload_data['dst_unique_col_names'] = [
        cname for idx, cname in enumerate(upload_data['dst_column_names'])
        if upload_data['dst_column_names'][idx]
    ]
    upload_data['src_selected_key'] = run_params['merge_key']
    upload_data['dst_selected_key'] = run_params['merge_key']
    upload_data['how_merge'] = run_params['merge_method']

    dst_df = pandas.load_table(workflow.get_data_frame_table_name())
    src_df = pandas.load_table(workflow.get_data_frame_upload_table_name())

    try:
        pandas.perform_dataframe_upload_merge(workflow, dst_df, src_df,
                                              upload_data)
    except Exception as exc:
        # Nuke the temporary table
        sql.delete_table(workflow.get_data_frame_upload_table_name())
        raise Exception(
            _('Unable to perform merge operation: {0}').format(str(exc)))

    col_names, col_types, is_key = workflow.get_column_info()
    log_item.payload['col_names'] = col_names
    log_item.payload['col_types'] = col_types
    log_item.payload['column_unique'] = is_key
    log_item.payload['num_rows'] = workflow.nrows
    log_item.payload['num_cols'] = workflow.ncols
    log_item.save(update_fields=['payload'])
Ejemplo n.º 19
0
    def create(self, validated_data, **kwargs):
        """Create the new workflow."""
        wflow_name = self.context.get('name')
        if not wflow_name:
            wflow_name = self.validated_data.get('name')
            if not wflow_name:
                raise Exception(_('Unexpected empty workflow name.'))

            if Workflow.objects.filter(name=wflow_name,
                                       user=self.context['user']).exists():
                raise Exception(
                    _('There is a workflow with this name. ' +
                      'Please provide a workflow name in the import page.'))

        # Initial values
        workflow_obj = None
        try:
            workflow_obj = Workflow(
                user=self.context['user'],
                name=wflow_name,
                description_text=validated_data['description_text'],
                nrows=0,
                ncols=0,
                attributes=validated_data['attributes'],
                query_builder_ops=validated_data.get('query_builder_ops', {}),
            )
            workflow_obj.save()

            # Create the columns
            column_data = ColumnSerializer(data=validated_data.get(
                'columns', []),
                                           many=True,
                                           context={'workflow': workflow_obj})
            # And save its content
            if column_data.is_valid():
                columns = column_data.save()
            else:
                raise Exception(_('Unable to save column information'))

            # If there is any column with position = 0, recompute (this is to
            # guarantee backward compatibility.
            if any(col.position == 0 for col in columns):
                for idx, col in enumerate(columns):
                    col.position = idx + 1
                    col.save()

            # Load the data frame
            data_frame = validated_data.get('data_frame')
            if data_frame is not None:
                # Store the table in the DB
                store_table(
                    data_frame,
                    workflow_obj.get_data_frame_table_name(),
                    dtype={
                        col.name: col.data_type
                        for col in workflow_obj.columns.all()
                    },
                )

                # Reconcile now the information in workflow and columns with
                # the one loaded
                workflow_obj.ncols = validated_data['ncols']
                workflow_obj.nrows = validated_data['nrows']

                workflow_obj.save()

            # Create the actions pointing to the workflow
            action_data = ActionSerializer(data=validated_data.get(
                'actions', []),
                                           many=True,
                                           context={
                                               'workflow': workflow_obj,
                                               'columns': columns
                                           })

            if action_data.is_valid():
                action_data.save()
            else:
                raise Exception(_('Unable to save column information'))

            # Create the views pointing to the workflow
            view_data = ViewSerializer(data=validated_data.get('views', []),
                                       many=True,
                                       context={
                                           'workflow': workflow_obj,
                                           'columns': columns
                                       })

            if view_data.is_valid():
                view_data.save()
            else:
                raise Exception(_('Unable to save column information'))
        except Exception:
            # Get rid of the objects created
            if workflow_obj:
                if workflow_obj.id:
                    workflow_obj.delete()
            raise

        return workflow_obj
Ejemplo n.º 20
0
def delete_column(
    user: get_user_model(),
    workflow: models.Workflow,
    column: models.Column,
    cond_to_delete: Optional[List[models.Condition]] = None,
):
    """Remove column from ontask.workflow.

    Given a workflow and a column, removes it from the workflow (and the
    corresponding data frame

    :param user: User performing the operation
    :param workflow: Workflow object
    :param column: Column object to delete
    :param cond_to_delete: List of conditions to delete after removing the
    column
    :return: Nothing. Effect reflected in the database
    """
    column.log(user, models.Log.COLUMN_DELETE)

    # Drop the column from the DB table storing the data frame
    sql.df_drop_column(workflow.get_data_frame_table_name(), column.name)

    # Reposition the columns above the one being deleted
    workflow.reposition_columns(column.position, workflow.ncols + 1)

    # Delete the column
    column.delete()

    # Update the information in the workflow
    workflow.ncols = workflow.ncols - 1
    workflow.save()

    if not cond_to_delete:
        # The conditions to delete are not given, so calculate them
        # Get the conditions/actions attached to this workflow
        cond_to_delete = [
            cond for cond in models.Condition.objects.filter(
                action__workflow=workflow) if column in cond.columns.all()
        ]

    # If a column disappears, the conditions that contain that variable
    # are removed
    actions_without_filters = []
    for condition in cond_to_delete:
        if condition.is_filter:
            actions_without_filters.append(condition.action)

        # Formula has the name of the deleted column. Delete it
        condition.delete()

    # Traverse the actions for which the filter has been deleted and reassess
    #  all their conditions
    # TODO: Explore how to do this asynchronously (or lazy)
    for act in actions_without_filters:
        act.update_n_rows_selected()

    # If a column disappears, the views that contain only that column need to
    # disappear as well as they are no longer relevant.
    for view in workflow.views.all():
        if view.columns.count() == 0:
            view.delete()
Ejemplo n.º 21
0
def get_table_visualization_items(
    workflow: models.Workflow,
    rowselect_key: Optional[str] = None,
    rowselect_val: Optional[Any] = None,
    pk: Optional[int] = None,
) -> Optional[Tuple[str, Dict]]:
    """Get a tuple with a template, and a dictionary to visualize a table.

    :param workflow: Workflow being processed
    :param rowselect_key: Optional key name to select a row
    :param rowselect_val: Optional value to select a row
    :param pk: Primary key of a view (could be none)
    :return:
    """
    # Get the data frame and the columns
    df_col_view = _get_df_and_columns(workflow, pk)
    if not df_col_view:
        return None
    df, columns_to_view, view = df_col_view

    if bool(rowselect_key):
        template = 'table/stat_row.html'
        row = sql.get_row(
            workflow.get_data_frame_table_name(),
            rowselect_key,
            rowselect_val,
            column_names=[col.name for col in columns_to_view],
        )
    else:
        row = None
        template = 'table/stat_view.html'

    vis_scripts = []
    visualizations = []
    context = {
        'style': 'max-width:{0}px; max-height:{1}px; margin: auto;'.format(
            VISUALIZATION_WIDTH,
            VISUALIZATION_HEIGHT),
    }
    for idx, column in enumerate(columns_to_view):

        # Add the title and surrounding container
        visualizations.append(
            '<hr/><h4 class="text-center">' + column.name + '</h4>')
        # If all values are empty, no need to proceed
        if all(not col_data for col_data in df[column.name]):
            visualizations.append(
                '<p>' + _('No values in this column') + '</p>')
            continue

        if row and not row[column.name]:
            visualizations.append(
                '<p class="alert-warning">' + _('No value in this column')
                + '</p>',
            )

        column_viz = _get_column_visualisations(
            column,
            df[[column.name]],
            vis_scripts=vis_scripts,
            viz_id='column_{0}'.format(idx),
            single_val=row[column.name] if row else None,
            context=context)

        visualizations.extend([vis.html_content for vis in column_viz])

    return template, {
        'value': rowselect_val,
        'view': view,
        'vis_scripts': vis_scripts,
        'visualizations': visualizations}