def upload_s2(request): """ The four step process will populate the following dictionary with name upload_data (divided by steps in which they are set ASSUMES: initial_column_names: List of column names in the initial file. column_types: List of column types as detected by pandas src_is_key_column: Boolean list with src columns that are unique step_1: URL name of the first step CREATES: rename_column_names: Modified column names to remove ambiguity when merging. columns_to_upload: Boolean list denoting the columns in SRC that are marked for upload. keep_key_column: Boolean list with those key columns that need to be kept. :param request: Web request :return: the dictionary upload_data in the session object """ workflow = get_workflow(request) if not workflow: return redirect('workflow:index') # Get the dictionary to store information about the upload # is stored in the session. upload_data = request.session.get('upload_data', None) if not upload_data: # If there is no object, or it is an empty dict, it denotes a direct # jump to this step, get back to the dataops page return redirect('dataops:uploadmerge') # Get the column names, types, and those that are unique from the data frame try: initial_columns = upload_data.get('initial_column_names') column_types = upload_data.get('column_types') src_is_key_column = upload_data.get('src_is_key_column') except KeyError: # The page has been invoked out of order return redirect( upload_data.get('step_1', reverse('dataops:uploadmerge'))) # Get or create the list with the renamed column names rename_column_names = upload_data.get('rename_column_names', None) if rename_column_names is None: rename_column_names = initial_columns[:] upload_data['rename_column_names'] = rename_column_names # Get or create list of booleans identifying columns to be uploaded columns_to_upload = upload_data.get('columns_to_upload', None) if columns_to_upload is None: columns_to_upload = [True] * len(initial_columns) upload_data['columns_to_upload'] = columns_to_upload # Get or create list of booleans identifying key columns to be kept keep_key_column = upload_data.get('keep_key_column', None) if keep_key_column is None: keep_key_column = upload_data['src_is_key_column'][:] upload_data['keep_key_column'] = keep_key_column # Bind the form with the received data (remember unique columns) form = SelectColumnUploadForm(request.POST or None, column_names=rename_column_names, columns_to_upload=columns_to_upload, is_key=src_is_key_column, keep_key=keep_key_column) # Get a hold of the fields to create a list to be processed in the page load_fields = [f for f in form if f.name.startswith('upload_')] newname_fields = [f for f in form if f.name.startswith('new_name_')] src_key_fields = [ form['make_key_%s' % idx] if src_is_key_column[idx] else None for idx in range(len(src_is_key_column)) ] # Create one of the context elements for the form. Pack the lists so that # they can be iterated in the template df_info = [ list(i) for i in zip(load_fields, initial_columns, newname_fields, column_types, src_key_fields) ] # Process the initial loading of the form and return if request.method != 'POST' or not form.is_valid(): # Update the dictionary with the session information request.session['upload_data'] = upload_data context = { 'form': form, 'wid': workflow.id, 'prev_step': upload_data['step_1'], 'df_info': df_info } if not ops.workflow_id_has_table(workflow.id): # It is an upload, not a merge, set the next step to finish context['next_name'] = _('Finish') return render(request, 'dataops/upload_s2.html', context) # At this point we are processing a valid POST request # We need to modify upload_data with the information received in the post for i in range(len(initial_columns)): new_name = form.cleaned_data['new_name_%s' % i] upload_data['rename_column_names'][i] = new_name upload = form.cleaned_data['upload_%s' % i] upload_data['columns_to_upload'][i] = upload if src_is_key_column[i]: # If the column is key, check if the user wants to keep it keep_key_column[i] = form.cleaned_data['make_key_%s' % i] # Update the dictionary with the session information request.session['upload_data'] = upload_data # Load the existing DF or None if it doesn't exist existing_df = pandas_db.load_from_db(workflow.id) if existing_df is not None: # This is a merge operation, so move to Step 3 return redirect('dataops:upload_s3') # This is an upload operation (not a merge) save the uploaded dataframe in # the DB and finish. # Get the uploaded data_frame try: data_frame = ops.load_upload_from_db(workflow.id) except Exception: return render( request, 'error.html', {'message': _('Exception while retrieving the data frame')}) # Update the data frame status = ops.perform_dataframe_upload_merge(workflow.id, existing_df, data_frame, upload_data) if status: # Something went wrong. Flag it and reload context = { 'form': form, 'wid': workflow.id, 'prev_step': upload_data['step_1'], 'df_info': df_info } return render(request, 'dataops/upload_s2.html', context) # Nuke the temporary table pandas_db.delete_upload_table(workflow.id) # Log the event col_info = workflow.get_column_info() logs.ops.put( request.user, 'workflow_data_upload', workflow, { 'id': workflow.id, 'name': workflow.name, 'num_rows': workflow.nrows, 'num_cols': workflow.ncols, 'column_names': col_info[0], 'column_types': col_info[1], 'column_unique': col_info[2] }) # Go back to show the workflow detail return redirect(reverse('workflow:detail', kwargs={'pk': workflow.id}))
def upload_s4(request): """ Step 4: Show the user the expected effect of the merge and perform it. ASSUMES: initial_column_names: List of column names in the initial file. column_types: List of column types as detected by pandas src_is_key_column: Boolean list with src columns that are unique step_1: URL name of the first step rename_column_names: Modified column names to remove ambiguity when merging. columns_to_upload: Boolean list denoting the columns in SRC that are marked for upload. dst_column_names: List of column names in destination frame dst_is_unique_column: Boolean list with dst columns that are unique dst_unique_col_names: List with the column names that are unique dst_selected_key: Key column name selected in DST src_selected_key: Key column name selected in SRC how_merge: How to merge. One of {left, right, outter, inner} :param request: Web request :return: """ # Get the workflow id we are processing workflow = get_workflow(request) if not workflow: return redirect('workflow:index') # Get the dictionary containing the information about the upload upload_data = request.session.get('upload_data', None) if not upload_data: # If there is nsendo object, someone is trying to jump directly here. return redirect('dataops:uploadmerge') # Check the type of request that is being processed if request.method == 'POST': # We are processing a POST request # Get the dataframes to merge try: dst_df = pandas_db.load_from_db(workflow.id) src_df = ops.load_upload_from_db(workflow.id) except Exception: return render(request, 'error.html', {'message': _('Exception while loading data frame')}) # Performing the merge status = ops.perform_dataframe_upload_merge(workflow.id, dst_df, src_df, upload_data) # Nuke the temporary table pandas_db.delete_upload_table(workflow.id) col_info = workflow.get_column_info() if status: logs.ops.put( request.user, 'workflow_data_failedmerge', workflow, { 'id': workflow.id, 'name': workflow.name, 'num_rows': workflow.nrows, 'num_cols': workflow.ncols, 'column_names': col_info[0], 'column_types': col_info[1], 'column_unique': col_info[2], 'error_msg': status }) messages.error(request, _('Merge operation failed.') + ' (' + status + ')'), return redirect(reverse('dataops:uploadmerge')) # Log the event logs.ops.put( request.user, 'workflow_data_merge', workflow, { 'id': workflow.id, 'name': workflow.name, 'num_rows': workflow.nrows, 'num_cols': workflow.ncols, 'column_names': col_info[0], 'column_types': col_info[1], 'column_unique': col_info[2] }) # Remove the csvupload from the session object request.session.pop('upload_data', None) return redirect(reverse('workflow:detail', kwargs={'pk': workflow.id})) # We are processing a GET request # Create the information to include in the final report table dst_column_names = upload_data['dst_column_names'] dst_selected_key = upload_data['dst_selected_key'] src_selected_key = upload_data['src_selected_key'] # List of final column names final_columns = sorted(set().union(dst_column_names, upload_data['rename_column_names'])) # Dictionary with (new src column name: (old name, is_uploaded?) src_info = { x: (y, z) for (x, y, z) in zip(upload_data['rename_column_names'], upload_data['initial_column_names'], upload_data['columns_to_upload']) } # Create the strings to show in the table for each of the rows explaining # what is going to be the effect of the update operation over them. # # There are 8 cases depending on the column name being a key column, # in DST, SRC, if SRC is being renamed, and SRC is being loaded. # # Case 1: The column is the key column used for the merge (skip it) # # Case 2: in DST, NOT in SRC: # Dst | | # # Case 3: in DST, in SRC, NOT LOADED # Dst Name | <-- | Src new name (Ignored) # # Case 4: NOT in DST, in SRC, NOT LOADED # | | Src new name (Ignored) # # Case 5: in DST, in SRC, Loaded, no rename: # Dst Name (Update) | <-- | Src name # # Case 6: in DST, in SRC, loaded, rename: # Dst Name (Update) | <-- | Src new name (Renamed) # # Case 7: NOT in DST, in SRC, loaded, no rename # Dst Name (NEW) | <-- | src name # # Case 8: NOT in DST, in SRC, loaded, renamed # Dst Name (NEW) | <-- | src name (renamed) # info = [] for colname in final_columns: # Case 1: Skip the keys if colname == src_selected_key or colname == dst_selected_key: continue # Case 2: Column is in DST and left untouched (no counter part in SRC) if colname not in src_info.keys(): info.append((colname, False, '')) continue # Get old name and if it is going to be loaded old_name, toLoad = src_info[colname] # Column is not going to be loaded anyway if not toLoad: if colname in dst_column_names: # Case 3 info.append((colname, False, colname + _(' (Ignored)'))) else: # Case 4 info.append(('', False, colname + _(' (Ignored)'))) continue # Initial name on the dst data frame dst_name = colname # Column not present in DST, so it is a new column if colname not in dst_column_names: dst_name += _(' (New)') else: dst_name += _(' (Update)') src_name = colname if colname != old_name: src_name += _(' (Renamed)') # Cases 5 - 8 info.append((dst_name, True, src_name)) # Store the value in the request object and update request.session['upload_data'] = upload_data return render( request, 'dataops/upload_s4.html', { 'prev_step': reverse('dataops:upload_s3'), 'info': info, 'next_name': 'Finish' })
def upload_s4(request): """ Step 4: Show the user the expected effect of the merge and perform it. ASSUMES: initial_column_names: List of column names in the initial file. column_types: List of column types as detected by pandas src_is_key_column: Boolean list with src columns that are unique step_1: URL name of the first step rename_column_names: Modified column names to remove ambiguity when merging. columns_to_upload: Boolean list denoting the columns in SRC that are marked for upload. dst_column_names: List of column names in destination frame dst_is_unique_column: Boolean list with dst columns that are unique dst_unique_col_names: List with the column names that are unique dst_selected_key: Key column name selected in DST src_selected_key: Key column name selected in SRC how_merge: How to merge. One of {left, right, outter, inner} how_dup_columns: How to handle column overlap autorename_column_names: Automatically modified column names override_columns_names: Names of dst columns that will be overridden in merge :param request: Web request :return: """ # Get the workflow id we are processing workflow = get_workflow(request) if not workflow: return redirect('workflow:index') # Get the dictionary containing the information about the upload upload_data = request.session.get('upload_data', None) if not upload_data: # If there is no object, someone is trying to jump directly here. return redirect('dataops:list') # Check the type of request that is being processed if request.method == 'POST': # We are processing a POST request # Get the dataframes to merge try: dst_df = pandas_db.load_from_db(workflow.id) src_df = ops.load_upload_from_db(workflow.id) except Exception: return render(request, 'error.html', {'message': 'Exception while loading data frame'}) # Performing the merge status = ops.perform_dataframe_upload_merge(workflow.id, dst_df, src_df, upload_data) # Nuke the temporary table pandas_db.delete_upload_table(workflow.id) col_info = workflow.get_column_info() if status: logs.ops.put( request.user, 'workflow_data_failedmerge', workflow, { 'id': workflow.id, 'name': workflow.name, 'num_rows': workflow.nrows, 'num_cols': workflow.ncols, 'column_names': col_info[0], 'column_types': col_info[1], 'column_unique': col_info[2], 'error_msg': status }) messages.error(request, 'Merge operation failed. (' + status + ')'), return redirect(reverse('dataops:list')) # Log the event logs.ops.put( request.user, 'workflow_data_merge', workflow, { 'id': workflow.id, 'name': workflow.name, 'num_rows': workflow.nrows, 'num_cols': workflow.ncols, 'column_names': col_info[0], 'column_types': col_info[1], 'column_unique': col_info[2] }) # Remove the csvupload from the session object request.session.pop('upload_data', None) return redirect('dataops:list') # We are processing a GET request # Create the information to include in the final report table dst_column_names = upload_data['dst_column_names'] src_selected_key = upload_data['src_selected_key'] # Triplets to show in the page (dst column, Boolean saying there is some # change, and the message on the src column autorename_column_names = upload_data['autorename_column_names'] rename_column_names = upload_data['rename_column_names'] info = [] initial_column_names = upload_data['initial_column_names'] # Create the strings to show in the table for each of the rows explaining # what is going to be the effect of the merge operation over them. override_columns_names = set([]) for idx, (x, y, z) in enumerate( zip(initial_column_names, rename_column_names, upload_data['columns_to_upload'])): # There are several possible cases # # 1) The unique key. No message needed because it is displayed at # the top of the rows # 2) The column has not been selected. Simply show (Ignored) in the # right. # 3) Column is selected and is NEW # 4) Column is selected and was renamed by the user # 5) Column is selected and was automatically renamed by the tool # when requesting to preserve the overlapping columns # CASE 1: If it is a key (compare the rename value in case user tried # to rename it. if y == src_selected_key: continue # CASE 2: Column not selected, thus simply print "Ignored") if not z: info.append(('', False, x + ' (Ignored)')) continue # Calculate the final name after the renaming final_name = x suffix = '' # Logic to figure out the final name after renaming if y != x: # If the corresponding name in rename_column_names is different, # change final_name = y # To add to the column suffix = ', Renamed' # If autorename table exists, and the new name is different, # rename again if autorename_column_names and autorename_column_names[idx] != y: final_name = \ autorename_column_names[idx] suffix = ', Automatically renamed' else: # Check if there was autorename if autorename_column_names and \ autorename_column_names[idx] != x: final_name = \ autorename_column_names[idx] suffix = ', Automatically renamed' if final_name in dst_column_names: suffix = ' (Override' + suffix + ')' override_columns_names.add(final_name) else: suffix = ' (New' + suffix + ')' info.append((final_name + suffix, True, x)) # Store the value in the request object and update upload_data['override_columns_names'] = list(override_columns_names) request.session['upload_data'] = upload_data return render( request, 'dataops/upload_s4.html', { 'prev_step': reverse('dataops:upload_s3'), 'info': info, 'next_name': 'Finish' })