Esempio n. 1
0
def delete_column(
    user: get_user_model(),
    workflow: models.Workflow,
    column: models.Column,
    cond_to_delete: Optional[List[models.Condition]] = None,
):
    """Remove column from ontask.workflow.

    Given a workflow and a column, removes it from the workflow (and the
    corresponding data frame

    :param user: User performing the operation
    :param workflow: Workflow object
    :param column: Column object to delete
    :param cond_to_delete: List of conditions to delete after removing the
    column
    :return: Nothing. Effect reflected in the database
    """
    column.log(user, models.Log.COLUMN_DELETE)

    # Drop the column from the DB table storing the data frame
    sql.df_drop_column(workflow.get_data_frame_table_name(), column.name)

    # Reposition the columns above the one being deleted
    workflow.reposition_columns(column.position, workflow.ncols + 1)

    # Delete the column
    column.delete()

    # Update the information in the workflow
    workflow.ncols = workflow.ncols - 1
    workflow.save()

    if not cond_to_delete:
        # The conditions to delete are not given, so calculate them
        # Get the conditions/actions attached to this workflow
        cond_to_delete = [
            cond for cond in models.Condition.objects.filter(
                action__workflow=workflow)
            if column in cond.columns.all()]

    # If a column disappears, the conditions that contain that variable
    # are removed
    actions_without_filters = []
    for condition in cond_to_delete:
        if condition.is_filter:
            actions_without_filters.append(condition.action)

        # Formula has the name of the deleted column. Delete it
        condition.delete()

    # Traverse the actions for which the filter has been deleted and reassess
    #  all their conditions
    # TODO: Explore how to do this asynchronously (or lazy)
    for act in actions_without_filters:
        act.update_n_rows_selected()

    # If a column disappears, the views that contain only that column need to
    # disappear as well as they are no longer relevant.
    for view in workflow.views.all():
        if view.columns.count() == 0:
            view.delete()
Esempio n. 2
0
def batch_load_df_from_athenaconnection(
    workflow: models.Workflow,
    conn: models.AthenaConnection,
    run_params: Dict,
    log_item: models.Log,
):
    """Batch load a DF from an Athena connection.

    run_params has:
    aws_secret_access_key: Optional[str] = None,
    aws_session_token: Optional[str] = None,
    table_name: Optional[str] = None
    key_column_name[str] = None
    merge_method[str] = None

    from pyathena import connect
    from pyathena.pandas_cursor import PandasCursor

    cursor = connect(
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key,
        aws_session_token=aws_session_token,
        s3_staging_dir=staging_dir,
        region_name=region_name)

    df = pd.read_sql('SELECT * FROM given_table_name', cursor)
    print(df.describe())
    print(df.head())

    :param workflow: Workflow to store the new data
    :param conn: AthenaConnection object with the connection parameters.
    :param run_params: Dictionary with additional connection parameters
    :param log_item: Log object to reflect the status of the execution
    :return: Nothing.
    """
    staging_dir = 's3://{0}'.format(conn.aws_bucket_name)
    if conn.aws_file_path:
        staging_dir = staging_dir + '/' + conn.aws_file_path

    cursor = connect(aws_access_key_id=conn.aws_access_key,
                     aws_secret_access_key=run_params['aws_secret_access_key'],
                     aws_session_token=run_params['aws_session_token'],
                     s3_staging_dir=staging_dir,
                     region_name=conn.aws_region_name)

    data_frame = pd.read_sql(
        'SELECT * FROM {0}'.format(run_params['table_name']), cursor)

    # Strip white space from all string columns and try to convert to
    # datetime just in case
    data_frame = process_object_column(data_frame)

    pandas.verify_data_frame(data_frame)

    col_names, col_types, is_key = pandas.store_temporary_dataframe(
        data_frame, workflow)

    upload_data = {
        'initial_column_names': col_names,
        'col_types': col_types,
        'src_is_key_column': is_key,
        'rename_column_names': col_names[:],
        'columns_to_upload': [True] * len(col_names),
        'keep_key_column': is_key[:]
    }

    if not workflow.has_data_frame():
        # Regular load operation
        pandas.store_workflow_table(workflow, upload_data)
        log_item.payload['col_names'] = col_names
        log_item.payload['col_types'] = col_types
        log_item.payload['column_unique'] = is_key
        log_item.payload['num_rows'] = workflow.nrows
        log_item.payload['num_cols'] = workflow.ncols
        log_item.save()
        return

    # Merge operation
    upload_data['dst_column_names'] = workflow.get_column_names()
    upload_data['dst_is_unique_column'] = workflow.get_column_unique()
    upload_data['dst_unique_col_names'] = [
        cname for idx, cname in enumerate(upload_data['dst_column_names'])
        if upload_data['dst_column_names'][idx]
    ]
    upload_data['src_selected_key'] = run_params['merge_key']
    upload_data['dst_selected_key'] = run_params['merge_key']
    upload_data['how_merge'] = run_params['merge_method']

    dst_df = pandas.load_table(workflow.get_data_frame_table_name())
    src_df = pandas.load_table(workflow.get_data_frame_upload_table_name())

    try:
        pandas.perform_dataframe_upload_merge(workflow, dst_df, src_df,
                                              upload_data)
    except Exception as exc:
        # Nuke the temporary table
        sql.delete_table(workflow.get_data_frame_upload_table_name())
        raise Exception(
            _('Unable to perform merge operation: {0}').format(str(exc)))

    col_names, col_types, is_key = workflow.get_column_info()
    log_item.payload['col_names'] = col_names
    log_item.payload['col_types'] = col_types
    log_item.payload['column_unique'] = is_key
    log_item.payload['num_rows'] = workflow.nrows
    log_item.payload['num_cols'] = workflow.ncols
    log_item.save()
Esempio n. 3
0
    def create(self, validated_data, **kwargs):
        """Create the new workflow."""
        wflow_name = self.context.get('name')
        if not wflow_name:
            wflow_name = self.validated_data.get('name')
            if not wflow_name:
                raise Exception(_('Unexpected empty workflow name.'))

            if Workflow.objects.filter(name=wflow_name,
                                       user=self.context['user']).exists():
                raise Exception(
                    _('There is a workflow with this name. ' +
                      'Please provide a workflow name in the import page.'))

        # Initial values
        workflow_obj = None
        try:
            workflow_obj = Workflow(
                user=self.context['user'],
                name=wflow_name,
                description_text=validated_data['description_text'],
                nrows=0,
                ncols=0,
                attributes=validated_data['attributes'],
                query_builder_ops=validated_data.get('query_builder_ops', {}),
            )
            workflow_obj.save()

            # Create the columns
            column_data = ColumnSerializer(data=validated_data.get(
                'columns', []),
                                           many=True,
                                           context={'workflow': workflow_obj})
            # And save its content
            if column_data.is_valid():
                columns = column_data.save()
            else:
                raise Exception(_('Unable to save column information'))

            # If there is any column with position = 0, recompute (this is to
            # guarantee backward compatibility.
            if any(col.position == 0 for col in columns):
                for idx, col in enumerate(columns):
                    col.position = idx + 1
                    col.save()

            # Load the data frame
            data_frame = validated_data.get('data_frame')
            if data_frame is not None:
                # Store the table in the DB
                store_table(
                    data_frame,
                    workflow_obj.get_data_frame_table_name(),
                    dtype={
                        col.name: col.data_type
                        for col in workflow_obj.columns.all()
                    },
                )

                # Reconcile now the information in workflow and columns with
                # the one loaded
                workflow_obj.ncols = validated_data['ncols']
                workflow_obj.nrows = validated_data['nrows']

                workflow_obj.save()

            # Create the actions pointing to the workflow
            action_data = ActionSerializer(data=validated_data.get(
                'actions', []),
                                           many=True,
                                           context={
                                               'workflow': workflow_obj,
                                               'columns': columns
                                           })

            if action_data.is_valid():
                action_data.save()
            else:
                raise Exception(_('Unable to save column information'))

            # Create the views pointing to the workflow
            view_data = ViewSerializer(data=validated_data.get('views', []),
                                       many=True,
                                       context={
                                           'workflow': workflow_obj,
                                           'columns': columns
                                       })

            if view_data.is_valid():
                view_data.save()
            else:
                raise Exception(_('Unable to save column information'))
        except Exception:
            # Get rid of the objects created
            if workflow_obj:
                if workflow_obj.id:
                    workflow_obj.delete()
            raise

        return workflow_obj
def edit_action_out(
    request: HttpRequest,
    workflow: Workflow,
    action: Action,
) -> HttpResponse:
    """Edit action out.

    :param request: Request object
    :param workflow: The workflow with the action
    :param action: Action
    :return: HTML response
    """
    # Create the form
    form = EditActionOutForm(request.POST or None, instance=action)

    form_filter = FilterForm(request.POST or None,
                             instance=action.get_filter(),
                             action=action)

    # Processing the request after receiving the text from the editor
    if request.method == 'POST' and form.is_valid() and form_filter.is_valid():
        # Get content
        text_content = form.cleaned_data.get('text_content')

        # Render the content as a template and catch potential problems.
        if _text_renders_correctly(text_content, action, form):
            # Log the event
            Log.objects.register(
                request.user, Log.ACTION_UPDATE, action.workflow, {
                    'id': action.id,
                    'name': action.name,
                    'workflow_id': workflow.id,
                    'workflow_name': workflow.name,
                    'content': text_content
                })

            # Text is good. Update the content of the action
            action.set_text_content(text_content)

            # If it is a JSON action, store the target_url
            if (action.action_type == Action.personalized_json
                    or action.action_type == Action.send_list_json):
                # Update the target_url field
                action.target_url = form.cleaned_data['target_url']

            action.save()

            if request.POST['Submit'] == 'Submit':
                return redirect(request.get_full_path())

            return redirect('action:index')

    # This is a GET request or a faulty POST request

    # Get the filter or None
    filter_condition = action.get_filter()

    # Context to render the form
    context = {
        'filter_condition':
        filter_condition,
        'action':
        action,
        'load_summernote': (action.action_type == Action.personalized_text
                            or action.action_type == Action.send_list),
        'conditions':
        action.conditions.filter(is_filter=False),
        'other_conditions':
        Condition.objects.filter(
            action__workflow=workflow,
            is_filter=False,
        ).exclude(action=action),
        'query_builder_ops':
        workflow.get_query_builder_ops_as_str(),
        'attribute_names': [attr for attr in list(workflow.attributes.keys())],
        'columns':
        workflow.columns.all(),
        'stat_columns':
        workflow.columns.filter(is_key=False),
        'selected_rows':
        filter_condition.n_rows_selected if filter_condition else -1,
        'has_data':
        action.workflow.has_table(),
        'is_send_list': (action.action_type == Action.send_list
                         or action.action_type == Action.send_list_json),
        'all_false_conditions':
        any(cond.n_rows_selected == 0 for cond in action.conditions.all()),
        'rows_all_false':
        action.get_row_all_false_count(),
        'total_rows':
        workflow.nrows,
        'form':
        form,
        'form_filter':
        form_filter,
        'vis_scripts':
        PlotlyHandler.get_engine_scripts(),
    }

    # Return the same form in the same page
    return render(request, 'action/edit_out.html', context=context)
def _do_clone_workflow(workflow: Workflow) -> Workflow:
    """Clone the workflow.

    :param workflow: source workflow

    :return: Clone object
    """
    new_workflow = Workflow(
        user=workflow.user,
        name=create_new_name(
            workflow.name,
            Workflow.objects.filter(
                Q(user=workflow.user) | Q(shared=workflow.user)),
        ),
        description_text=workflow.description_text,
        nrows=workflow.nrows,
        ncols=workflow.ncols,
        attributes=copy.deepcopy(workflow.attributes),
        query_builder_ops=copy.deepcopy(workflow.query_builder_ops),
        luser_email_column_md5=workflow.luser_email_column_md5,
        lusers_is_outdated=workflow.lusers_is_outdated)
    new_workflow.save()

    try:
        new_workflow.shared.set(list(workflow.shared.all()))
        new_workflow.lusers.set(list(workflow.lusers.all()))

        # Clone the columns
        for item_obj in workflow.columns.all():
            do_clone_column_only(item_obj, new_workflow=new_workflow)

        # Update the luser_email_column if needed:
        if workflow.luser_email_column:
            new_workflow.luser_email_column = new_workflow.columns.get(
                name=workflow.luser_email_column.name, )

        # Clone the DB table
        clone_table(workflow.get_data_frame_table_name(),
                    new_workflow.get_data_frame_table_name())

        # Clone actions
        for item_obj in workflow.actions.all():
            do_clone_action(item_obj, new_workflow)

        for item_obj in workflow.views.all():
            do_clone_view(item_obj, new_workflow)

        # Done!
        new_workflow.save()
    except Exception as exc:
        new_workflow.delete()
        raise exc

    return new_workflow