def csvupload1(request): """ The four step process will populate the following dictionary with name upload_data (divided by steps in which they are set STEP 1: initial_column_names: List of column names in the initial file. column_types: List of column types as detected by pandas src_is_key_column: Boolean list with src columns that are unique step_1: URL name of the first step :param request: Web request :return: Creates the upload_data dictionary in the session """ # Get the current workflow workflow = get_workflow(request) if not workflow: return redirect('workflow:index') # Bind the form with the received data form = UploadCSVFileForm(request.POST or None, request.FILES or None) # Process the initial loading of the form if request.method != 'POST': return render(request, 'dataops/upload1.html', {'form': form, 'wid': workflow.id, 'dtype': 'CSV', 'dtype_select': 'CSV file', 'prev_step': reverse('dataops:list')}) # Process the reception of the file if not form.is_multipart(): msg = "CSV upload form is not multiform" context = {'message': msg} meta = request.META.get('HTTP_REFERER', None) if meta: context['meta'] = meta return render(request, 'critical_error.html', context=context) # If not valid, this is probably because the file submitted was too big if not form.is_valid(): return render(request, 'dataops/upload1.html', {'form': form, 'wid': workflow.id, 'dtype': 'CSV', 'dtype_select': 'CSV file', 'prev_step': reverse('dataops:list')}) # Process CSV file using pandas read_csv try: data_frame = pandas_db.load_df_from_csvfile( request.FILES['file'], form.cleaned_data['skip_lines_at_top'], form.cleaned_data['skip_lines_at_bottom']) except Exception as e: form.add_error('file', 'File could not be processed ({0})'.format(e.message)) return render(request, 'dataops/upload1.html', {'form': form, 'dtype': 'CSV', 'dtype_select': 'CSV file', 'prev_step': reverse('dataops:list')}) # If the frame has repeated column names, it will not be processed. if len(set(data_frame.columns)) != len(data_frame.columns): dup = [x for x, v in Counter(list(data_frame.columns)) if v > 1] form.add_error( 'file', 'The file has duplicated column names (' + ','.join(dup) + ').') return render(request, 'dataops/upload1.html', {'form': form, 'dtype': 'CSV', 'dtype_select': 'CSV file', 'prev_step': reverse('dataops:list')}) # If the data frame does not have any unique key, it is not useful (no # way to uniquely identify rows). There must be at least one. src_is_key_column = ops.are_unique_columns(data_frame) if not any(src_is_key_column): form.add_error( 'file', 'The data has no column with unique values per row. ' 'At least one column must have unique values.') return render(request, 'dataops/upload1.html', {'form': form, 'dtype': 'CSV', 'dtype_select': 'CSV file', 'prev_step': reverse('dataops:list')}) # Store the data frame in the DB. try: # Get frame info with three lists: names, types and is_key frame_info = ops.store_upload_dataframe_in_db(data_frame, workflow.id) except Exception as e: form.add_error( 'file', 'Sorry. This file cannot be processed.' ) return render(request, 'dataops/upload1.html', {'form': form, 'dtype': 'CSV', 'dtype_select': 'CSV file', 'prev_step': reverse('dataops:list')}) # Dictionary to populate gradually throughout the sequence of steps. It # is stored in the session. request.session['upload_data'] = { 'initial_column_names': frame_info[0], 'column_types': frame_info[1], 'src_is_key_column': frame_info[2], 'step_1': 'dataops:csvupload1' } return redirect('dataops:upload_s2')
def excelupload1(request): """ Step 1 of the whole process to read data into the platform. The four step process will populate the following dictionary with name upload_data (divided by steps in which they are set STEP 1: initial_column_names: List of column names in the initial file. column_types: List of column types as detected by pandas src_is_key_column: Boolean list with src columns that are unique step_1: URL name of the first step :param request: Web request :return: Creates the upload_data dictionary in the session """ # Get the current workflow workflow = get_workflow(request) if not workflow: return redirect('workflow:index') # Bind the form with the received data form = UploadExcelFileForm(request.POST or None, request.FILES or None) # Process the initial loading of the form if request.method != 'POST': return render(request, 'dataops/upload1.html', {'form': form, 'wid': workflow.id, 'dtype': 'Excel', 'dtype_select': 'Excel file', 'prev_step': reverse('dataops:uploadmerge')}) # Process the reception of the file if not form.is_multipart(): msg = _("Excel upload form is not multiform") context = {'message': msg} meta = request.META.get('HTTP_REFERER', None) if meta: context['meta'] = meta return render(request, 'critical_error.html', context=context) # If not valid, this is probably because the file submitted was too big if not form.is_valid(): return render(request, 'dataops/upload1.html', {'form': form, 'wid': workflow.id, 'dtype': 'Excel', 'dtype_select': 'Excel file', 'prev_step': reverse('dataops:uploadmerge')}) # Process Excel file using pandas read_excel try: data_frame = pd.read_excel( request.FILES['file'], sheet_name=form.cleaned_data['sheet'], index_col=False, infer_datetime_format=True, quotechar='"', ) # Strip white space from all string columns and try to convert to # datetime just in case for x in list(data_frame.columns): if data_frame[x].dtype.name == 'object': # Column is a string! data_frame[x] = data_frame[x].str.strip() # Try the datetime conversion try: series = pd.to_datetime(data_frame[x], infer_datetime_format=True) # Datetime conversion worked! Update the data_frame data_frame[x] = series except ValueError: pass except Exception as e: form.add_error('file', _('File could not be processed ({0})').format(e.message)) return render(request, 'dataops/upload1.html', {'form': form, 'dtype': 'Excel', 'dtype_select': 'Excel file', 'prev_step': reverse('dataops:uploadmerge')}) # If the frame has repeated column names, it will not be processed. if len(set(data_frame.columns)) != len(data_frame.columns): dup = [x for x, v in Counter(list(data_frame.columns)) if v > 1] form.add_error( 'file', _('The file has duplicated column names') + ' (' + ','.join(dup) + ').') return render(request, 'dataops/upload1.html', {'form': form, 'dtype': 'Excel', 'dtype_select': 'Excel file', 'prev_step': reverse('dataops:uploadmerge')}) # If the data frame does not have any unique key, it is not useful (no # way to uniquely identify rows). There must be at least one. src_is_key_column = ops.are_unique_columns(data_frame) if not any(src_is_key_column): form.add_error( 'file', _('The data has no column with unique values per row. ' 'At least one column must have unique values.')) return render(request, 'dataops/upload1.html', {'form': form, 'dtype': 'Excel', 'dtype_select': 'Excel file', 'prev_step': reverse('dataops:uploadmerge')}) # Store the data frame in the DB. try: # Get frame info with three lists: names, types and is_key frame_info = ops.store_upload_dataframe_in_db(data_frame, workflow.id) except Exception as e: form.add_error( 'file', _('Sorry. This file cannot be processed.') ) return render(request, 'dataops/upload1.html', {'form': form, 'dtype': 'Excel', 'dtype_select': 'Excel file', 'prev_step': reverse('dataops:uploadmerge')}) # Dictionary to populate gradually throughout the sequence of steps. It # is stored in the session. request.session['upload_data'] = { 'initial_column_names': frame_info[0], 'column_types': frame_info[1], 'src_is_key_column': frame_info[2], 'step_1': reverse('dataops:excelupload1') } return redirect('dataops:upload_s2')
def sqlupload1(request, pk): """ The four step process will populate the following dictionary with name upload_data (divided by steps in which they are set STEP 1: initial_column_names: List of column names in the initial file. column_types: List of column types as detected by pandas src_is_key_column: Boolean list with src columns that are unique step_1: URL name of the first step :param request: Web request :return: Creates the upload_data dictionary in the session """ # Get the current workflow workflow = get_workflow(request) if not workflow: return redirect('workflow:index') # Get the connection conn = SQLConnection.objects.filter(pk=pk).first() if not conn: return redirect('dataops:sqlconns') form = None if conn.db_password: # The connection needs a password to operate form = SQLRequestPassword(request.POST or None) context = { 'form': form, 'wid': workflow.id, 'dtype': 'SQL', 'dtype_select': 'SQL connection', 'prev_step': reverse('dataops:sqlconns'), 'conn_type': conn.conn_type, 'conn_driver': conn.conn_driver, 'db_user': conn.db_user, 'db_passwd': '<PROTECTED>' if conn.db_password else '', 'db_host': conn.db_host, 'db_port': conn.db_port, 'db_name': conn.db_name, 'db_table': conn.db_table } # Process the initial loading of the form if request.method != 'POST' or (form and not form.is_valid()): return render(request, 'dataops/sqlupload1.html', context) read_pwd = None if form: read_pwd = form.cleaned_data['password'] # Process SQL connection using pandas try: data_frame = pandas_db.load_df_from_sqlconnection(conn, read_pwd) except Exception as e: messages.error(request, 'Unable to obtain data: {0}'.format(e.message)) return render(request, 'dataops/upload1.html', context) # If the frame has repeated column names, it will not be processed. if len(set(data_frame.columns)) != len(data_frame.columns): dup = [x for x, v in Counter(list(data_frame.columns)) if v > 1] messages.error( request, 'The data frame has duplicated column names (' + ','.join(dup) + ').') return render(request, 'dataops/sqlupload1.html', context) # If the data frame does not have any unique key, it is not useful (no # way to uniquely identify rows). There must be at least one. src_is_key_column = ops.are_unique_columns(data_frame) if not any(src_is_key_column): messages.error( request, 'The data has no column with unique values per row. ' 'At least one column must have unique values.') return render(request, 'dataops/sqlupload1.html', context) # Store the data frame in the DB. try: # Get frame info with three lists: names, types and is_key frame_info = ops.store_upload_dataframe_in_db(data_frame, workflow.id) except Exception as e: form.add_error( None, 'Sorry. The data from this connection cannot be processed.') return render(request, 'dataops/sqlupload1.html', context) # Dictionary to populate gradually throughout the sequence of steps. It # is stored in the session. request.session['upload_data'] = { 'initial_column_names': frame_info[0], 'column_types': frame_info[1], 'src_is_key_column': frame_info[2], 'step_1': reverse('dataops:sqlupload1', kwargs={'pk': conn.id}) } return redirect('dataops:upload_s2')