def delete_column( user: get_user_model(), workflow: models.Workflow, column: models.Column, cond_to_delete: Optional[List[models.Condition]] = None, ): """Remove column from ontask.workflow. Given a workflow and a column, removes it from the workflow (and the corresponding data frame :param user: User performing the operation :param workflow: Workflow object :param column: Column object to delete :param cond_to_delete: List of conditions to delete after removing the column :return: Nothing. Effect reflected in the database """ column.log(user, models.Log.COLUMN_DELETE) # Drop the column from the DB table storing the data frame sql.df_drop_column(workflow.get_data_frame_table_name(), column.name) # Reposition the columns above the one being deleted workflow.reposition_columns(column.position, workflow.ncols + 1) # Delete the column column.delete() # Update the information in the workflow workflow.ncols = workflow.ncols - 1 workflow.save() if not cond_to_delete: # The conditions to delete are not given, so calculate them # Get the conditions/actions attached to this workflow cond_to_delete = [ cond for cond in models.Condition.objects.filter( action__workflow=workflow) if column in cond.columns.all()] # If a column disappears, the conditions that contain that variable # are removed actions_without_filters = [] for condition in cond_to_delete: if condition.is_filter: actions_without_filters.append(condition.action) # Formula has the name of the deleted column. Delete it condition.delete() # Traverse the actions for which the filter has been deleted and reassess # all their conditions # TODO: Explore how to do this asynchronously (or lazy) for act in actions_without_filters: act.update_n_rows_selected() # If a column disappears, the views that contain only that column need to # disappear as well as they are no longer relevant. for view in workflow.views.all(): if view.columns.count() == 0: view.delete()
def batch_load_df_from_athenaconnection( workflow: models.Workflow, conn: models.AthenaConnection, run_params: Dict, log_item: models.Log, ): """Batch load a DF from an Athena connection. run_params has: aws_secret_access_key: Optional[str] = None, aws_session_token: Optional[str] = None, table_name: Optional[str] = None key_column_name[str] = None merge_method[str] = None from pyathena import connect from pyathena.pandas_cursor import PandasCursor cursor = connect( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, aws_session_token=aws_session_token, s3_staging_dir=staging_dir, region_name=region_name) df = pd.read_sql('SELECT * FROM given_table_name', cursor) print(df.describe()) print(df.head()) :param workflow: Workflow to store the new data :param conn: AthenaConnection object with the connection parameters. :param run_params: Dictionary with additional connection parameters :param log_item: Log object to reflect the status of the execution :return: Nothing. """ staging_dir = 's3://{0}'.format(conn.aws_bucket_name) if conn.aws_file_path: staging_dir = staging_dir + '/' + conn.aws_file_path cursor = connect(aws_access_key_id=conn.aws_access_key, aws_secret_access_key=run_params['aws_secret_access_key'], aws_session_token=run_params['aws_session_token'], s3_staging_dir=staging_dir, region_name=conn.aws_region_name) data_frame = pd.read_sql( 'SELECT * FROM {0}'.format(run_params['table_name']), cursor) # Strip white space from all string columns and try to convert to # datetime just in case data_frame = process_object_column(data_frame) pandas.verify_data_frame(data_frame) col_names, col_types, is_key = pandas.store_temporary_dataframe( data_frame, workflow) upload_data = { 'initial_column_names': col_names, 'col_types': col_types, 'src_is_key_column': is_key, 'rename_column_names': col_names[:], 'columns_to_upload': [True] * len(col_names), 'keep_key_column': is_key[:] } if not workflow.has_data_frame(): # Regular load operation pandas.store_workflow_table(workflow, upload_data) log_item.payload['col_names'] = col_names log_item.payload['col_types'] = col_types log_item.payload['column_unique'] = is_key log_item.payload['num_rows'] = workflow.nrows log_item.payload['num_cols'] = workflow.ncols log_item.save() return # Merge operation upload_data['dst_column_names'] = workflow.get_column_names() upload_data['dst_is_unique_column'] = workflow.get_column_unique() upload_data['dst_unique_col_names'] = [ cname for idx, cname in enumerate(upload_data['dst_column_names']) if upload_data['dst_column_names'][idx] ] upload_data['src_selected_key'] = run_params['merge_key'] upload_data['dst_selected_key'] = run_params['merge_key'] upload_data['how_merge'] = run_params['merge_method'] dst_df = pandas.load_table(workflow.get_data_frame_table_name()) src_df = pandas.load_table(workflow.get_data_frame_upload_table_name()) try: pandas.perform_dataframe_upload_merge(workflow, dst_df, src_df, upload_data) except Exception as exc: # Nuke the temporary table sql.delete_table(workflow.get_data_frame_upload_table_name()) raise Exception( _('Unable to perform merge operation: {0}').format(str(exc))) col_names, col_types, is_key = workflow.get_column_info() log_item.payload['col_names'] = col_names log_item.payload['col_types'] = col_types log_item.payload['column_unique'] = is_key log_item.payload['num_rows'] = workflow.nrows log_item.payload['num_cols'] = workflow.ncols log_item.save()
def create(self, validated_data, **kwargs): """Create the new workflow.""" wflow_name = self.context.get('name') if not wflow_name: wflow_name = self.validated_data.get('name') if not wflow_name: raise Exception(_('Unexpected empty workflow name.')) if Workflow.objects.filter(name=wflow_name, user=self.context['user']).exists(): raise Exception( _('There is a workflow with this name. ' + 'Please provide a workflow name in the import page.')) # Initial values workflow_obj = None try: workflow_obj = Workflow( user=self.context['user'], name=wflow_name, description_text=validated_data['description_text'], nrows=0, ncols=0, attributes=validated_data['attributes'], query_builder_ops=validated_data.get('query_builder_ops', {}), ) workflow_obj.save() # Create the columns column_data = ColumnSerializer(data=validated_data.get( 'columns', []), many=True, context={'workflow': workflow_obj}) # And save its content if column_data.is_valid(): columns = column_data.save() else: raise Exception(_('Unable to save column information')) # If there is any column with position = 0, recompute (this is to # guarantee backward compatibility. if any(col.position == 0 for col in columns): for idx, col in enumerate(columns): col.position = idx + 1 col.save() # Load the data frame data_frame = validated_data.get('data_frame') if data_frame is not None: # Store the table in the DB store_table( data_frame, workflow_obj.get_data_frame_table_name(), dtype={ col.name: col.data_type for col in workflow_obj.columns.all() }, ) # Reconcile now the information in workflow and columns with # the one loaded workflow_obj.ncols = validated_data['ncols'] workflow_obj.nrows = validated_data['nrows'] workflow_obj.save() # Create the actions pointing to the workflow action_data = ActionSerializer(data=validated_data.get( 'actions', []), many=True, context={ 'workflow': workflow_obj, 'columns': columns }) if action_data.is_valid(): action_data.save() else: raise Exception(_('Unable to save column information')) # Create the views pointing to the workflow view_data = ViewSerializer(data=validated_data.get('views', []), many=True, context={ 'workflow': workflow_obj, 'columns': columns }) if view_data.is_valid(): view_data.save() else: raise Exception(_('Unable to save column information')) except Exception: # Get rid of the objects created if workflow_obj: if workflow_obj.id: workflow_obj.delete() raise return workflow_obj
def edit_action_out( request: HttpRequest, workflow: Workflow, action: Action, ) -> HttpResponse: """Edit action out. :param request: Request object :param workflow: The workflow with the action :param action: Action :return: HTML response """ # Create the form form = EditActionOutForm(request.POST or None, instance=action) form_filter = FilterForm(request.POST or None, instance=action.get_filter(), action=action) # Processing the request after receiving the text from the editor if request.method == 'POST' and form.is_valid() and form_filter.is_valid(): # Get content text_content = form.cleaned_data.get('text_content') # Render the content as a template and catch potential problems. if _text_renders_correctly(text_content, action, form): # Log the event Log.objects.register( request.user, Log.ACTION_UPDATE, action.workflow, { 'id': action.id, 'name': action.name, 'workflow_id': workflow.id, 'workflow_name': workflow.name, 'content': text_content }) # Text is good. Update the content of the action action.set_text_content(text_content) # If it is a JSON action, store the target_url if (action.action_type == Action.personalized_json or action.action_type == Action.send_list_json): # Update the target_url field action.target_url = form.cleaned_data['target_url'] action.save() if request.POST['Submit'] == 'Submit': return redirect(request.get_full_path()) return redirect('action:index') # This is a GET request or a faulty POST request # Get the filter or None filter_condition = action.get_filter() # Context to render the form context = { 'filter_condition': filter_condition, 'action': action, 'load_summernote': (action.action_type == Action.personalized_text or action.action_type == Action.send_list), 'conditions': action.conditions.filter(is_filter=False), 'other_conditions': Condition.objects.filter( action__workflow=workflow, is_filter=False, ).exclude(action=action), 'query_builder_ops': workflow.get_query_builder_ops_as_str(), 'attribute_names': [attr for attr in list(workflow.attributes.keys())], 'columns': workflow.columns.all(), 'stat_columns': workflow.columns.filter(is_key=False), 'selected_rows': filter_condition.n_rows_selected if filter_condition else -1, 'has_data': action.workflow.has_table(), 'is_send_list': (action.action_type == Action.send_list or action.action_type == Action.send_list_json), 'all_false_conditions': any(cond.n_rows_selected == 0 for cond in action.conditions.all()), 'rows_all_false': action.get_row_all_false_count(), 'total_rows': workflow.nrows, 'form': form, 'form_filter': form_filter, 'vis_scripts': PlotlyHandler.get_engine_scripts(), } # Return the same form in the same page return render(request, 'action/edit_out.html', context=context)
def _do_clone_workflow(workflow: Workflow) -> Workflow: """Clone the workflow. :param workflow: source workflow :return: Clone object """ new_workflow = Workflow( user=workflow.user, name=create_new_name( workflow.name, Workflow.objects.filter( Q(user=workflow.user) | Q(shared=workflow.user)), ), description_text=workflow.description_text, nrows=workflow.nrows, ncols=workflow.ncols, attributes=copy.deepcopy(workflow.attributes), query_builder_ops=copy.deepcopy(workflow.query_builder_ops), luser_email_column_md5=workflow.luser_email_column_md5, lusers_is_outdated=workflow.lusers_is_outdated) new_workflow.save() try: new_workflow.shared.set(list(workflow.shared.all())) new_workflow.lusers.set(list(workflow.lusers.all())) # Clone the columns for item_obj in workflow.columns.all(): do_clone_column_only(item_obj, new_workflow=new_workflow) # Update the luser_email_column if needed: if workflow.luser_email_column: new_workflow.luser_email_column = new_workflow.columns.get( name=workflow.luser_email_column.name, ) # Clone the DB table clone_table(workflow.get_data_frame_table_name(), new_workflow.get_data_frame_table_name()) # Clone actions for item_obj in workflow.actions.all(): do_clone_action(item_obj, new_workflow) for item_obj in workflow.views.all(): do_clone_view(item_obj, new_workflow) # Done! new_workflow.save() except Exception as exc: new_workflow.delete() raise exc return new_workflow