def test_table_pandas_merge_to_outer_NaN(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] age = workflow.columns.filter(name='age')[0] age.is_key = False age.save() email = workflow.columns.filter(name='email')[0] email.is_key = False email.save() # Drop the column with booleans because the data type is lost workflow_delete_column(workflow, workflow.columns.get(name='registered')) # Transform new table into string r_df = pd.DataFrame(self.src_df2) # Load the df from the db df = load_table(workflow.get_data_frame_table_name()) new_df = pd.merge(df, r_df, how="outer", left_on="sid", right_on="sid") # Get the data through the API response = self.client.put(reverse('table:api_pmerge', kwargs={'wid': workflow.id}), { "src_df": df_to_string(r_df), "how": "outer", "left_on": "sid", "right_on": "sid" }, format='json') # Get the new workflow workflow = Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 4) self.assertEqual(workflow.ncols, 8) # Load the df from the db df = load_table(workflow.get_data_frame_table_name()) # Compare both elements and check wf df consistency self.compare_tables(df, new_df) # Check for df/wf consistency self.assertTrue(check_wf_df(workflow))
def test_table_pandas_update(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.new_table) r_df = detect_datetime_columns(r_df) # Upload a new table response = self.client.put(reverse('table:api_pops', kwargs={'wid': workflow.id}), {'data_frame': df_to_string(r_df)}, format='json') # Refresh wflow (has been updated) workflow = Workflow.objects.get(id=workflow.id) # Load the df from the db df = load_table(workflow.get_data_frame_table_name()) # Compare both elements self.compare_tables(r_df, df) # Check that the rest of the # information is correct workflow = Workflow.objects.get(id=workflow.id) self.assertTrue(check_wf_df(workflow))
def test_table_JSON_merge_to_left(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] age = workflow.columns.filter(name='age')[0] age.is_key = False age.save() email = workflow.columns.filter(name='email')[0] email.is_key = False email.save() # Get the data through the API response = self.client.put(reverse('table:api_merge', kwargs={'wid': workflow.id}), { "src_df": self.src_df, "how": "left", "left_on": "sid", "right_on": "sid" }, format='json') # Get the new workflow workflow = Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 3) df = load_table(workflow.get_data_frame_table_name()) self.assertEqual(df[df['sid'] == 1]['newcol'].values[0], self.src_df['newcol'][0]) # Check for df/wf consistency self.assertTrue(check_wf_df(workflow))
def workflow_restrict_column(column: Column) -> Optional[str]: """Set category of the column to the existing set of values. Given a workflow and a column, modifies the column so that only the values already present are allowed for future updates. :param column: Column object to restrict :return: String with error or None if correct """ # Load the data frame data_frame = load_table(column.workflow.get_data_frame_table_name()) cat_values = set(data_frame[column.name].dropna()) if not cat_values: # Column has no meaningful values. Nothing to do. return _('Column has no meaningful values') # Set categories column.set_categories(list(cat_values)) column.save() # Re-evaluate the operands in the workflow column.workflow.set_query_builder_ops() column.workflow.save() # Correct execution return None
def test_table_pandas_merge_to_left(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.src_df) # Get the data through the API response = self.client.put(reverse('table:api_pmerge', kwargs={'wid': workflow.id}), { "src_df": df_to_string(r_df), "how": "left", "left_on": "sid", "right_on": "sid" }, format='json') # Get the new workflow workflow = Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 3) df = load_table(workflow.get_data_frame_table_name()) self.assertEqual(df[df['sid'] == 1]['newcol'].values[0], self.src_df['newcol'][0]) # Check for df/wf consistency self.assertTrue(check_wf_df(workflow))
def test_table_pandas_create(self): # Create a second workflow response = self.client.post(reverse('workflow:api_workflows'), { 'name': test.wflow_name + '2', 'attributes': { 'one': 'two' } }, format='json') # Get the only workflow in the fixture workflow = Workflow.objects.get(id=response.data['id']) # Transform new table into a data frame r_df = pd.DataFrame(self.new_table) r_df = detect_datetime_columns(r_df) # Upload the table response = self.client.post(reverse('table:api_pops', kwargs={'wid': workflow.id}), {'data_frame': df_to_string(r_df)}, format='json') # Refresh wflow (has been updated) workflow = Workflow.objects.get(id=workflow.id) # Load the df from the db df = load_table(workflow.get_data_frame_table_name()) # Compare both elements self.compare_tables(r_df, df) # Check that the rest of the # information is correct self.assertTrue(check_wf_df(workflow))
def put( self, request: HttpRequest, wid: int, format=None, workflow: Optional[Workflow] = None, ) -> HttpResponse: """Process the put request.""" # Get the dst_df dst_df = load_table(workflow.get_data_frame_table_name()) serializer = self.serializer_class(data=request.data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) # Check that the parameters are correct how = serializer.validated_data['how'] if how == '' or how not in ['left', 'right', 'outer', 'inner']: raise APIException( _('how must be one of left, right, outer or inner')) left_on = serializer.validated_data['left_on'] if not is_unique_column(dst_df[left_on]): raise APIException( _('column {0} does not contain a unique key.').format(left_on)) # Operation has been accepted by the serializer src_df = serializer.validated_data['src_df'] right_on = serializer.validated_data['right_on'] if right_on not in list(src_df.columns): raise APIException( _('column {0} not found in data frame').format(right_on)) if not is_unique_column(src_df[right_on]): raise APIException( _('column {0} does not contain a unique key.').format( right_on)) merge_info = { 'how_merge': how, 'dst_selected_key': left_on, 'src_selected_key': right_on, 'initial_column_names': list(src_df.columns), 'rename_column_names': list(src_df.columns), 'columns_to_upload': [True] * len(list(src_df.columns)), } # Ready to perform the MERGE try: perform_dataframe_upload_merge(workflow, dst_df, src_df, merge_info) except Exception as exc: raise APIException( _('Unable to perform merge operation: {0}').format(str(exc))) # Merge went through. return Response(serializer.data, status=status.HTTP_201_CREATED)
def get( self, request: HttpRequest, wid: int, format=None, workflow: Optional[Workflow] = None, ) -> HttpResponse: """Retrieve the existing data frame.""" serializer = self.serializer_class( {'data_frame': load_table(workflow.get_data_frame_table_name())}) return Response(serializer.data)
def column_restrict_values( request: HttpRequest, pk: int, workflow: Optional[Workflow] = None, column: Optional[Column] = None, ) -> JsonResponse: """Restrict future values in this column to one of those already present. :param request: HTTP request :param pk: ID of the column to restrict. The workflow element is taken from the session. :return: Render the delete column form """ # If the columns is unique and it is the only one, we cannot allow # the operation if column.is_key: # This is the only key column messages.error(request, _('You cannot restrict a key column')) return JsonResponse({'html_redirect': reverse('workflow:detail')}) # Get the name of the column to delete context = {'pk': pk, 'cname': column.name} # Get the values from the data frame df = load_table(workflow.get_data_frame_table_name()) context['values'] = ', '.join(set(df[column.name])) if request.method == 'POST': # Proceed restricting the column error_txt = workflow_restrict_column(column) if isinstance(error_txt, str): # Something went wrong. Show it messages.error(request, error_txt) # Log the event Log.objects.register( request.user, Log.COLUMN_RESTRICT, workflow, { 'id': workflow.id, 'name': workflow.name, 'column_name': column.name, 'values': context['values'] }) return JsonResponse({'html_redirect': reverse('workflow:detail')}) return JsonResponse({ 'html_form': render_to_string('workflow/includes/partial_column_restrict.html', context, request=request), })
def do_sql_txt_operand(self, input_value, op_value, type_value, value, row_yes=1, row_no=1): self.set_skel(input_value, op_value.format(''), type_value, value, 'v_' + type_value) data_frame = load_table(self.test_table, self.test_columns, self.skel) self.assertEqual(data_frame.shape[0], row_yes) evaluate_formula(self.skel, EVAL_TXT) if op_value.find('{0}') != -1: self.set_skel(input_value, op_value.format('not_'), type_value, value, 'v_' + type_value) data_frame = load_table(self.test_table, self.test_columns, self.skel) self.assertEqual(data_frame.shape[0], row_no) evaluate_formula(self.skel, EVAL_TXT)
def test_df_equivalent_after_sql(self): # Parse the CSV df_source = load_df_from_csvfile(io.StringIO(self.csv1), 0, 0) # Store the DF in the DB store_table(df_source, self.table_name) # Load it from the DB df_dst = load_table(self.table_name) df_dst['date1'] = df_dst['date1'].dt.tz_convert('Australia/Adelaide') df_dst['date2'] = df_dst['date2'].dt.tz_convert('Australia/Adelaide') # Data frames mut be identical assert df_source.equals(df_dst)
def post( self, request: HttpRequest, wid: int, format=None, workflow: Optional[Workflow] = None, ) -> HttpResponse: """Create a new data frame.""" if load_table(workflow.get_data_frame_table_name()) is not None: raise APIException( _('Post request requires workflow without a table')) return self.override(request, wid=wid, format=format, workflow=workflow)
def test_column_clone(self): """Test adding a random column.""" column = self.workflow.columns.get(name='Q01') resp = self.get_response('workflow:column_clone', {'pk': column.id}, is_ajax=True) self.assertTrue(status.is_success(resp.status_code)) # Create the new question resp = self.get_response('workflow:column_clone', {'pk': column.id}, method='POST', is_ajax=True) self.assertTrue(status.is_success(resp.status_code)) df = load_table(self.workflow.get_data_frame_table_name()) self.assertTrue(df['Copy of Q01'].equals(df['Q01']))
def test_table_pandas_get(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] # Get the data through the API response = self.client.get( reverse('table:api_pops', kwargs={'wid': workflow.id})) # Transform the response into a data frame r_df = string_to_df(response.data['data_frame']) # Load the df from the db df = load_table(workflow.get_data_frame_table_name()) # Compare both elements self.compare_tables(r_df, df)
def parse_data_frames(self): # Parse the two CSV strings and return as data frames if self.workflow: # Get the workflow data frame df_dst = load_table(self.workflow.get_data_frame_table_name()) else: df_dst = load_df_from_csvfile(io.StringIO(self.csv1), 0, 0) df_src = load_df_from_csvfile(io.StringIO(self.csv2), 0, 0) # Fix the merge_info fields. self.merge_info['initial_column_names'] = list(df_src.columns) self.merge_info['rename_column_names'] = list(df_src.columns) self.merge_info['columns_to_upload'] = list(df_src.columns) return df_dst, df_src
def test_04_merge_right(self): self.template_merge('right', rename=False) # Assert the content of the dataframe wflow = Workflow.objects.get(name=self.wf_name) df = load_table(wflow.get_data_frame_table_name()) self.assertTrue('key' in set(df.columns)) self.assertTrue('text3' in set(df.columns)) self.assertTrue('double3' in set(df.columns)) self.assertTrue('bool3' in set(df.columns)) self.assertTrue('date3' in set(df.columns)) assert check_wf_df(Workflow.objects.get(name='Testing Merge')) # End of session self.logout()
def test_merge_inner(self): # Get the workflow self.workflow = Workflow.objects.all()[0] # Parse the source data frame df_dst, df_src = self.parse_data_frames() self.merge_info['how_merge'] = 'inner' result = perform_dataframe_upload_merge(self.workflow, df_dst, df_src, self.merge_info) # Load again the workflow data frame df_dst = load_table(self.workflow.get_data_frame_table_name()) # Result must be correct (None) self.assertEquals(result, None)
def test_tracking(self): """Test that tracking hits are properly stored.""" # Repeat the checks two times to test if they are accumulating for idx in range(1, 3): # Iterate over the tracking items for trck in self.trck_tokens: self.client.get(reverse('trck') + '?v=' + trck) # Get the workflow and the data frame workflow = Workflow.objects.get(name=self.wflow_name) df = load_table(workflow.get_data_frame_table_name()) # Check that the results have been updated in the DB (to 1) for uemail in [ x[1] for x in test.user_info if x[1].startswith('student') ]: self.assertEqual( int(df.loc[df['email'] == uemail, 'EmailRead_1'].values[0]), idx)
def test_table_pandas_merge_get(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] # Get the data through the API response = self.client.get( reverse('table:api_pmerge', kwargs={'wid': workflow.id})) workflow = Workflow.objects.all()[0] # Transform new table into string r_df = string_to_df(response.data['src_df']) # Load the df from the db df = load_table(workflow.get_data_frame_table_name()) # Compare both elements and check wf df consistency self.compare_tables(r_df, df) self.assertTrue(check_wf_df(workflow))
def forwards(apps, schema_editor): if schema_editor.connection.alias != 'default': return # Traverse the workflows and verify that the columns are in the same # order than the columns in the workflow Workflow = apps.get_model('workflow', 'Workflow') for w in Workflow.objects.all(): if not w.is_table_in_db(w): continue df = load_table(w.id) if len(df.columns) != w.ncols: raise Exception('Inconsistent number of columns') df_columns = list(df.columns) for c in w.columns.all(): c.position = df_columns.index(c.name) + 1 c.save()
def get( self, request: HttpRequest, wid: int, format=None, workflow: Optional[Workflow] = None, ) -> HttpResponse: """Process the GET request.""" # Try to retrieve the wflow to check for permissions serializer = self.serializer_class({ 'src_df': load_table(workflow.get_data_frame_table_name()), 'how': '', 'left_on': '', 'right_on': '' }) return Response(serializer.data)
def test_random_column_add(self): """Test adding a random column.""" # GET the form resp = self.get_response('workflow:random_column_add', is_ajax=True) self.assertTrue(status.is_success(resp.status_code)) # Create the new question resp = self.get_response('workflow:random_column_add', method='POST', req_params={ 'name': 'RANDOM COLUMN', 'description_text': 'RANDOM COLUMN DESC', 'data_type': 'integer', 'position': '0', 'column_values': '12' }, is_ajax=True) self.assertTrue(status.is_success(resp.status_code)) df = load_table(self.workflow.get_data_frame_table_name()) self.assertTrue(all(0 < num < 13 for num in df['RANDOM COLUMN']))
def test_formula_column_add(self): """Test adding a formula column.""" # GET the form resp = self.get_response('workflow:formula_column_add', is_ajax=True) self.assertTrue(status.is_success(resp.status_code)) # Create the new question resp = self.get_response('workflow:formula_column_add', method='POST', req_params={ 'name': 'FORMULA COLUMN', 'description_text': 'FORMULA COLUMN DESC', 'data_type': 'integer', 'position': '0', 'columns': ['12', '13'], 'op_type': 'sum' }, is_ajax=True) self.assertTrue(status.is_success(resp.status_code)) df = load_table(self.workflow.get_data_frame_table_name()) self.assertTrue(df['FORMULA COLUMN'].equals(df['Q01'] + df['Q02']))
def _get_column_visualization_items( workflow, column, max_width=VISUALIZATION_WIDTH, max_height=VISUALIZATION_HEIGHT, ): """Get the visualization items (scripts and HTML) for a column. :param workflow: Workflow being processed :param column: Column requested :param max_width: Maximum width attribute in pixels :param max_width: Maximum height attribute in pixels :return: Tuple stat_data with descriptive stats, visualization scripts and visualization HTML """ # Get the dataframe df = load_table(workflow.get_data_frame_table_name()) # Extract the data to show at the top of the page stat_data = get_column_statistics(df[column.name]) vis_scripts = [] visualizations = _get_column_visualisations( column, df[[column.name]], vis_scripts, context={ 'style': 'max-width:{0}px; max-height:{1}px;'.format( max_width, max_height, ) + 'display:inline-block;', }, ) return stat_data, vis_scripts, visualizations
def test_05_merge_outer_fail(self): self.template_merge('outer') # Assert that the error is at the top of the page self.assertIn( 'Merge operation produced a result without any key columns.', self.selenium.page_source ) # Assert the content of the dataframe wflow = Workflow.objects.get(name=self.wf_name) df = load_table(wflow.get_data_frame_table_name()) self.assertTrue('key' in set(df.columns)) self.assertTrue('key2' not in set(df.columns)) self.assertTrue('text3' not in set(df.columns)) self.assertTrue('double3' not in set(df.columns)) self.assertTrue('bool3' not in set(df.columns)) self.assertTrue('date3' not in set(df.columns)) assert check_wf_df(Workflow.objects.get(name='Testing Merge')) # End of session self.logout()
def run_plugin( workflow, plugin_info, input_column_names, output_column_names, output_suffix, merge_key, plugin_params, ): """ Execute the run method in the plugin. Execute the run method in a plugin with the dataframe from the given workflow :param workflow: Workflow object being processed :param plugin_info: PluginReistry object being processed :param input_column_names: List of input column names :param output_column_names: List of output column names :param output_suffix: Suffix that is added to the output column names :param merge_key: Key column to use in the merge :param plugin_params: Dictionary with the parameters to execute the plug in :return: Nothing, the result is stored in the log with log_id """ plugin_instance, msgs = load_plugin(plugin_info.filename) if plugin_instance is None: raise Exception( ugettext('Unable to instantiate plugin "{0}"').format( plugin_info.name), ) # Check that the list of given inputs is consistent: if plugin has a list # of inputs, it has to have the same length as the given list. if (plugin_instance.get_input_column_names() and len(plugin_instance.get_input_column_names()) != len(input_column_names)): raise Exception( ugettext( 'Inconsistent number of inputs when invoking plugin "{0}"', ).format(plugin_info.name), ) # Check that the list of given outputs has the same length as the list of # outputs proposed by the plugin if (plugin_instance.get_output_column_names() and len(plugin_instance.get_output_column_names()) != len(output_column_names)): raise Exception( ugettext( 'Inconsistent number of outputs when invoking plugin "{0}"', ).format(plugin_info.name), ) # Get the data frame from the workflow try: df = load_table(workflow.get_data_frame_table_name()) except Exception as exc: raise Exception( ugettext( 'Exception when retrieving the data frame from workflow: {0}', ).format(str(exc)), ) # Set the updated names of the input, output columns, and the suffix if not plugin_instance.get_input_column_names(): plugin_instance.input_column_names = input_column_names plugin_instance.output_column_names = output_column_names plugin_instance.output_suffix = output_suffix # Create a new dataframe with the given input columns, and rename them if # needed try: sub_df = pd.DataFrame(df[input_column_names]) if plugin_instance.get_input_column_names(): sub_df.columns = plugin_instance.get_input_column_names() except Exception as exc: raise Exception( ugettext('Error when creating data frame for plugin: {0}').format( str(exc))) # Try the execution and catch any exception try: new_df = plugin_instance.run(sub_df, parameters=plugin_params) except Exception as exc: raise Exception( ugettext('Error while executing plugin: {0}').format(str(exc)), ) # If plugin does not return a data frame, flag as error if not isinstance(new_df, pd.DataFrame): raise Exception( ugettext( 'Plugin executed but did not return a pandas data frame.'), ) # Execution is DONE. Now we have to perform various additional checks # Result has to have the exact same number of rows if new_df.shape[0] != df.shape[0]: raise Exception( ugettext('Incorrect number of rows ({0}) in result data frame.', ).format(new_df.shape[0]), ) # Merge key name cannot be part of the output df if merge_key in new_df.columns: raise Exception( ugettext( 'Column name {0} cannot be in the result data frame.'.format( merge_key)), ) # Result column names are consistent if set(new_df.columns) != set(plugin_instance.get_output_column_names()): raise Exception(ugettext('Incorrect columns in result data frame.')) # Add the merge column to the result df new_df[merge_key] = df[merge_key] # Proceed with the merge try: new_frame = perform_dataframe_upload_merge( workflow, df, new_df, { 'how_merge': 'inner', 'dst_selected_key': merge_key, 'src_selected_key': merge_key, 'initial_column_names': list(new_df.columns), 'rename_column_names': list(new_df.columns), 'columns_to_upload': [True] * len(list(new_df.columns)), }, ) except Exception as exc: raise Exception( ugettext('Error while merging result: {0}.').format(str(exc)), ) if isinstance(new_frame, str): raise Exception( ugettext('Error while merging result: {0}.').format(new_frame)) # Update execution time in the plugin plugin_info.executed = datetime.now( pytz.timezone(ontask_settings.TIME_ZONE), ) plugin_info.save() return True
def stat_table_view( request: HttpRequest, pk: Optional[int] = None, workflow: Optional[Workflow] = None, ) -> HttpResponse: """Render the page with stats and visualizations for a view. :param request: HTTP request :param pk: View id to use :return: Render the page """ # If the workflow has no data, something went wrong, go back to the # workflow details page if workflow.nrows == 0: messages.error( request, _('Unable to provide visualisation without data.')) return redirect('worflow:detail', workflow.id) # If a view is given, filter the columns view = None if pk: view = workflow.views.filter(pk=pk).first() if not view: # View not found. Redirect to workflow detail return redirect('home') columns_to_view = view.columns.filter(is_key=False) df = load_table( workflow.get_data_frame_table_name(), [col.name for col in columns_to_view], view.formula) else: # No view given, fetch the entire data frame columns_to_view = workflow.columns.filter(is_key=False) df = load_table(workflow.get_data_frame_table_name()) vis_scripts = [] visualizations = [] idx = -1 context = {'style': 'width:400px; height:225px;'} for column in columns_to_view: idx += 1 # Skip primary keys (no point to represent any of them) if column.is_key: continue # Add the title and surrounding container visualizations.append('<h4>' + column.name + '</h4>') # If all values are empty, no need to proceed if all(not col_data for col_data in df[column.name]): visualizations.append('<p>No values in this column</p><hr/>') continue visualizations.append('<div style="display: inline-flex;">') column_viz = _get_column_visualisations( column, df[[column.name]], vis_scripts=vis_scripts, viz_id='column_{0}'.format(idx), context=context) visualizations.extend([vis.html_content for vis in column_viz]) visualizations.append('</div><hr/>') return render( request, 'table/stat_view.html', {'view': view, 'vis_scripts': vis_scripts, 'visualizations': visualizations})
def stat_row_view( request: HttpRequest, pk: Optional[int] = None, workflow: Optional[Workflow] = None, ) -> HttpResponse: """Render stats for a row. Render the page with stats and visualizations for a row in the table and a view (subset of columns). The request must include key and value to get the right row. In principle, there is a visualisation for each row. :param request: HTTP request :param pk: View id to use :return: Render the page """ # If there is no workflow object, go back to the index # Get the pair key,value to fetch the row from the table update_key = request.GET.get('key') update_val = request.GET.get('val') if not update_key or not update_val: # Malformed request return render( request, 'error.html', {'message': _('Unable to visualize table row')}) # If a view is given, filter the columns columns_to_view = workflow.columns.all() column_names = workflow.get_column_names() if pk: view = workflow.views.filter(pk=pk).first() if not view: # View not found. Redirect to home return redirect('home') columns_to_view = view.columns.all() column_names = [col.name for col in columns_to_view] df = load_table( workflow.get_data_frame_table_name(), column_names, view.formula) else: # No view given, fetch the entire data frame df = load_table(workflow.get_data_frame_table_name()) # Get the row from the table row = get_rows( workflow.get_data_frame_table_name(), column_names=column_names, filter_pairs={update_key: update_val}, ).fetchone() vis_scripts = [] visualizations = [] context = {'style': 'width:400px; height:225px;'} for idx, column in enumerate(columns_to_view): # Skip primary keys (no point to represent any of them) if column.is_key: continue # Add the title and surrounding container visualizations.append('<h4>' + column.name + '</h4>') # If all values are empty, no need to proceed if all(not col for col in df[column.name]): visualizations.append( '<p>' + _('No values in this column') + '</p><hr/>') continue if row[column.name] is None or row[column.name] == '': visualizations.append( '<p class="alert-warning">' + _('No value for this student in this column') + '</p>', ) visualizations.append('<div style="display: inline-flex;">') col_viz = _get_column_visualisations( column, df[[column.name]], vis_scripts=vis_scripts, viz_id='column_{0}'.format(idx), single_val=row[column.name], context=context) visualizations.extend([viz.html_content for viz in col_viz]) visualizations.append('</div><hr/>') return render( request, 'table/stat_row.html', { 'value': update_val, 'vis_scripts': vis_scripts, 'visualizations': visualizations})
def random_column_add( request: HttpRequest, workflow: Optional[Workflow] = None, ) -> JsonResponse: """Create a column with random values (Modal). :param request: :return: """ # Get the workflow element if workflow.nrows == 0: messages.error(request, _('Cannot add column to a workflow without data')) return JsonResponse({'html_redirect': ''}) # Form to read/process data form = RandomColumnAddForm(data=request.POST or None) if request.method == 'POST' and form.is_valid(): # Save the column object attached to the form and add additional fields column = form.save(commit=False) column.workflow = workflow column.is_key = False # Save the instance try: column = form.save() form.save_m2m() except IntegrityError: form.add_error('name', _('A column with that name already exists')) return JsonResponse({ 'html_form': render_to_string( 'workflow/includes/partial_random_column_add.html', {'form': form}, request=request), }) # Update the data frame df = load_table(workflow.get_data_frame_table_name()) # Get the values and interpret its meaning column_values = form.cleaned_data['column_values'] # First, try to see if the field is a valid integer try: int_value = int(column_values) except ValueError: int_value = None if int_value: # At this point the field is an integer if int_value <= 1: form.add_error('values', _('The integer value has to be larger than 1')) return JsonResponse({ 'html_form': render_to_string( 'workflow/includes/partial_random_column_add.html', {'form': form}, request=request), }) intvals = [idx + 1 for idx in range(int_value)] else: # At this point the field is a string and the values are the comma # separated strings. intvals = [ valstr.strip() for valstr in column_values.strip().split(',') if valstr ] if not intvals: form.add_error( 'values', _('The value has to be a comma-separated list'), ) return JsonResponse({ 'html_form': render_to_string( 'workflow/includes/partial_random_column_add.html', {'form': form}, request=request), }) # Empty new column new_column = [None] * workflow.nrows # Create the random partitions partitions = _partition([idx for idx in range(workflow.nrows)], len(intvals)) # Assign values to partitions for idx, indexes in enumerate(partitions): for col_idx in indexes: new_column[col_idx] = intvals[idx] # Assign the new column to the data frame df[column.name] = new_column # Populate the column type column.data_type = pandas_datatype_names.get( df[column.name].dtype.name, ) # Update the positions of the appropriate columns workflow.reposition_columns(workflow.ncols + 1, column.position) column.save() workflow.refresh_from_db() # Store the df to DB try: store_dataframe(df, workflow) except Exception as exc: messages.error(request, _('Unable to add the column: {0}').format(str(exc))) return JsonResponse({'html_redirect': ''}) # Log the event Log.objects.register( request.user, Log.COLUMN_ADD_RANDOM, workflow, { 'id': workflow.id, 'name': workflow.name, 'column_name': column.name, 'column_type': column.data_type, 'value': column_values }) # The form has been successfully processed return JsonResponse({'html_redirect': ''}) return JsonResponse({ 'html_form': render_to_string('workflow/includes/partial_random_column_add.html', {'form': form}, request=request), })
def formula_column_add( request: HttpRequest, workflow: Optional[Workflow] = None, ) -> JsonResponse: """Add a formula column. :param request: :return: """ # Get the workflow element if workflow.nrows == 0: messages.error( request, _('Cannot add column to a workflow without data'), ) return JsonResponse({'html_redirect': ''}) # Form to read/process data form = FormulaColumnAddForm( form_data=request.POST or None, operands=_formula_column_operands, columns=workflow.columns.all(), ) if request.method == 'POST' and form.is_valid(): # Save the column object attached to the form and add additional fields column = form.save(commit=False) column.workflow = workflow column.is_key = False # Save the instance try: column.save() form.save_m2m() except IntegrityError: form.add_error('name', _('A column with that name already exists')) return JsonResponse({ 'html_form': render_to_string( 'workflow/includes/partial_formula_column_add.html', {'form': form}, request=request), }) # Update the data frame df = load_table(workflow.get_data_frame_table_name()) try: # Add the column with the appropriate computation operation = form.cleaned_data['op_type'] cnames = [col.name for col in form.selected_columns] df[column.name] = _op_distrib[operation](df[cnames]) except Exception as exc: # Something went wrong in pandas, we need to remove the column column.delete() # Notify in the form form.add_error( None, _('Unable to add the column: {0}').format(exc, ), ) return JsonResponse({ 'html_form': render_to_string( 'workflow/includes/partial_formula_column_add.html', {'form': form}, request=request, ), }) # Populate the column type column.data_type = pandas_datatype_names.get( df[column.name].dtype.name) # Update the positions of the appropriate columns workflow.reposition_columns(workflow.ncols + 1, column.position) # Save column and refresh the prefetched related in the workflow column.save() workflow.refresh_from_db() # Store the df to DB try: store_dataframe(df, workflow) except Exception as exc: messages.error(request, _('Unable to clone column: {0}').format(str(exc))) return JsonResponse({'html_redirect': ''}) # Log the event Log.objects.register( request.user, Log.COLUMN_ADD_FORMULA, workflow, { 'id': workflow.id, 'name': workflow.name, 'column_name': column.name, 'column_type': column.data_type }) # The form has been successfully processed return JsonResponse({'html_redirect': ''}) return JsonResponse({ 'html_form': render_to_string( 'workflow/includes/partial_formula_column_add.html', {'form': form}, request=request, ), })