Beispiel #1
0
    def test_table_pandas_merge_to_outer_NaN(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        age = workflow.columns.filter(name='age')[0]
        age.is_key = False
        age.save()

        email = workflow.columns.filter(name='email')[0]
        email.is_key = False
        email.save()

        # Drop the column with booleans because the data type is lost
        workflow_delete_column(workflow,
                               workflow.columns.get(name='registered'))

        # Transform new table into string
        r_df = pd.DataFrame(self.src_df2)

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())
        new_df = pd.merge(df, r_df, how="outer", left_on="sid", right_on="sid")

        # Get the data through the API
        response = self.client.put(reverse('table:api_pmerge',
                                           kwargs={'wid': workflow.id}), {
                                               "src_df": df_to_string(r_df),
                                               "how": "outer",
                                               "left_on": "sid",
                                               "right_on": "sid"
                                           },
                                   format='json')

        # Get the new workflow
        workflow = Workflow.objects.all()[0]

        # Result should have three rows as the initial DF
        self.assertEqual(workflow.nrows, 4)
        self.assertEqual(workflow.ncols, 8)

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())

        # Compare both elements and check wf df consistency
        self.compare_tables(df, new_df)

        # Check for df/wf consistency
        self.assertTrue(check_wf_df(workflow))
Beispiel #2
0
    def test_table_pandas_update(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        # Transform new table into string
        r_df = pd.DataFrame(self.new_table)
        r_df = detect_datetime_columns(r_df)

        # Upload a new table
        response = self.client.put(reverse('table:api_pops',
                                           kwargs={'wid': workflow.id}),
                                   {'data_frame': df_to_string(r_df)},
                                   format='json')

        # Refresh wflow (has been updated)
        workflow = Workflow.objects.get(id=workflow.id)

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())

        # Compare both elements
        self.compare_tables(r_df, df)

        # Check that the rest of the
        # information is correct
        workflow = Workflow.objects.get(id=workflow.id)
        self.assertTrue(check_wf_df(workflow))
Beispiel #3
0
    def test_table_JSON_merge_to_left(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        age = workflow.columns.filter(name='age')[0]
        age.is_key = False
        age.save()

        email = workflow.columns.filter(name='email')[0]
        email.is_key = False
        email.save()

        # Get the data through the API
        response = self.client.put(reverse('table:api_merge',
                                           kwargs={'wid': workflow.id}), {
                                               "src_df": self.src_df,
                                               "how": "left",
                                               "left_on": "sid",
                                               "right_on": "sid"
                                           },
                                   format='json')

        # Get the new workflow
        workflow = Workflow.objects.all()[0]

        # Result should have three rows as the initial DF
        self.assertEqual(workflow.nrows, 3)

        df = load_table(workflow.get_data_frame_table_name())
        self.assertEqual(df[df['sid'] == 1]['newcol'].values[0],
                         self.src_df['newcol'][0])

        # Check for df/wf consistency
        self.assertTrue(check_wf_df(workflow))
Beispiel #4
0
def workflow_restrict_column(column: Column) -> Optional[str]:
    """Set category of the column to the existing set of values.

    Given a workflow and a column, modifies the column so that only the
    values already present are allowed for future updates.

    :param column: Column object to restrict

    :return: String with error or None if correct
    """
    # Load the data frame
    data_frame = load_table(column.workflow.get_data_frame_table_name())

    cat_values = set(data_frame[column.name].dropna())
    if not cat_values:
        # Column has no meaningful values. Nothing to do.
        return _('Column has no meaningful values')

    # Set categories
    column.set_categories(list(cat_values))
    column.save()

    # Re-evaluate the operands in the workflow
    column.workflow.set_query_builder_ops()
    column.workflow.save()

    # Correct execution
    return None
Beispiel #5
0
    def test_table_pandas_merge_to_left(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        # Transform new table into string
        r_df = pd.DataFrame(self.src_df)

        # Get the data through the API
        response = self.client.put(reverse('table:api_pmerge',
                                           kwargs={'wid': workflow.id}), {
                                               "src_df": df_to_string(r_df),
                                               "how": "left",
                                               "left_on": "sid",
                                               "right_on": "sid"
                                           },
                                   format='json')

        # Get the new workflow
        workflow = Workflow.objects.all()[0]

        # Result should have three rows as the initial DF
        self.assertEqual(workflow.nrows, 3)

        df = load_table(workflow.get_data_frame_table_name())
        self.assertEqual(df[df['sid'] == 1]['newcol'].values[0],
                         self.src_df['newcol'][0])

        # Check for df/wf consistency
        self.assertTrue(check_wf_df(workflow))
Beispiel #6
0
    def test_table_pandas_create(self):
        # Create a second workflow
        response = self.client.post(reverse('workflow:api_workflows'), {
            'name': test.wflow_name + '2',
            'attributes': {
                'one': 'two'
            }
        },
                                    format='json')

        # Get the only workflow in the fixture
        workflow = Workflow.objects.get(id=response.data['id'])

        # Transform new table into a data frame
        r_df = pd.DataFrame(self.new_table)
        r_df = detect_datetime_columns(r_df)

        # Upload the table
        response = self.client.post(reverse('table:api_pops',
                                            kwargs={'wid': workflow.id}),
                                    {'data_frame': df_to_string(r_df)},
                                    format='json')

        # Refresh wflow (has been updated)
        workflow = Workflow.objects.get(id=workflow.id)

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())

        # Compare both elements
        self.compare_tables(r_df, df)

        # Check that the rest of the
        # information is correct
        self.assertTrue(check_wf_df(workflow))
Beispiel #7
0
    def put(
        self,
        request: HttpRequest,
        wid: int,
        format=None,
        workflow: Optional[Workflow] = None,
    ) -> HttpResponse:
        """Process the put request."""
        # Get the dst_df
        dst_df = load_table(workflow.get_data_frame_table_name())

        serializer = self.serializer_class(data=request.data)
        if not serializer.is_valid():
            return Response(serializer.errors,
                            status=status.HTTP_400_BAD_REQUEST)

        # Check that the parameters are correct
        how = serializer.validated_data['how']
        if how == '' or how not in ['left', 'right', 'outer', 'inner']:
            raise APIException(
                _('how must be one of left, right, outer or inner'))

        left_on = serializer.validated_data['left_on']
        if not is_unique_column(dst_df[left_on]):
            raise APIException(
                _('column {0} does not contain a unique key.').format(left_on))

        # Operation has been accepted by the serializer
        src_df = serializer.validated_data['src_df']

        right_on = serializer.validated_data['right_on']
        if right_on not in list(src_df.columns):
            raise APIException(
                _('column {0} not found in data frame').format(right_on))

        if not is_unique_column(src_df[right_on]):
            raise APIException(
                _('column {0} does not contain a unique key.').format(
                    right_on))

        merge_info = {
            'how_merge': how,
            'dst_selected_key': left_on,
            'src_selected_key': right_on,
            'initial_column_names': list(src_df.columns),
            'rename_column_names': list(src_df.columns),
            'columns_to_upload': [True] * len(list(src_df.columns)),
        }

        # Ready to perform the MERGE
        try:
            perform_dataframe_upload_merge(workflow, dst_df, src_df,
                                           merge_info)
        except Exception as exc:
            raise APIException(
                _('Unable to perform merge operation: {0}').format(str(exc)))

        # Merge went through.
        return Response(serializer.data, status=status.HTTP_201_CREATED)
Beispiel #8
0
 def get(
     self,
     request: HttpRequest,
     wid: int,
     format=None,
     workflow: Optional[Workflow] = None,
 ) -> HttpResponse:
     """Retrieve the existing data frame."""
     serializer = self.serializer_class(
         {'data_frame': load_table(workflow.get_data_frame_table_name())})
     return Response(serializer.data)
Beispiel #9
0
def column_restrict_values(
    request: HttpRequest,
    pk: int,
    workflow: Optional[Workflow] = None,
    column: Optional[Column] = None,
) -> JsonResponse:
    """Restrict future values in this column to one of those already present.

    :param request: HTTP request

    :param pk: ID of the column to restrict. The workflow element is taken
     from the session.

    :return: Render the delete column form
    """
    # If the columns is unique and it is the only one, we cannot allow
    # the operation
    if column.is_key:
        # This is the only key column
        messages.error(request, _('You cannot restrict a key column'))
        return JsonResponse({'html_redirect': reverse('workflow:detail')})

    # Get the name of the column to delete
    context = {'pk': pk, 'cname': column.name}

    # Get the values from the data frame
    df = load_table(workflow.get_data_frame_table_name())
    context['values'] = ', '.join(set(df[column.name]))

    if request.method == 'POST':
        # Proceed restricting the column
        error_txt = workflow_restrict_column(column)

        if isinstance(error_txt, str):
            # Something went wrong. Show it
            messages.error(request, error_txt)

            # Log the event
            Log.objects.register(
                request.user, Log.COLUMN_RESTRICT, workflow, {
                    'id': workflow.id,
                    'name': workflow.name,
                    'column_name': column.name,
                    'values': context['values']
                })

        return JsonResponse({'html_redirect': reverse('workflow:detail')})

    return JsonResponse({
        'html_form':
        render_to_string('workflow/includes/partial_column_restrict.html',
                         context,
                         request=request),
    })
Beispiel #10
0
    def do_sql_txt_operand(self,
                           input_value,
                           op_value,
                           type_value,
                           value,
                           row_yes=1,
                           row_no=1):

        self.set_skel(input_value, op_value.format(''), type_value, value,
                      'v_' + type_value)
        data_frame = load_table(self.test_table, self.test_columns, self.skel)
        self.assertEqual(data_frame.shape[0], row_yes)
        evaluate_formula(self.skel, EVAL_TXT)

        if op_value.find('{0}') != -1:
            self.set_skel(input_value, op_value.format('not_'), type_value,
                          value, 'v_' + type_value)
            data_frame = load_table(self.test_table, self.test_columns,
                                    self.skel)
            self.assertEqual(data_frame.shape[0], row_no)
            evaluate_formula(self.skel, EVAL_TXT)
Beispiel #11
0
    def test_df_equivalent_after_sql(self):

        # Parse the CSV
        df_source = load_df_from_csvfile(io.StringIO(self.csv1), 0, 0)

        # Store the DF in the DB
        store_table(df_source, self.table_name)

        # Load it from the DB
        df_dst = load_table(self.table_name)
        df_dst['date1'] = df_dst['date1'].dt.tz_convert('Australia/Adelaide')
        df_dst['date2'] = df_dst['date2'].dt.tz_convert('Australia/Adelaide')
        # Data frames mut be identical
        assert df_source.equals(df_dst)
Beispiel #12
0
 def post(
     self,
     request: HttpRequest,
     wid: int,
     format=None,
     workflow: Optional[Workflow] = None,
 ) -> HttpResponse:
     """Create a new data frame."""
     if load_table(workflow.get_data_frame_table_name()) is not None:
         raise APIException(
             _('Post request requires workflow without a table'))
     return self.override(request,
                          wid=wid,
                          format=format,
                          workflow=workflow)
Beispiel #13
0
    def test_column_clone(self):
        """Test adding a random column."""
        column = self.workflow.columns.get(name='Q01')
        resp = self.get_response('workflow:column_clone', {'pk': column.id},
                                 is_ajax=True)
        self.assertTrue(status.is_success(resp.status_code))

        # Create the new question
        resp = self.get_response('workflow:column_clone', {'pk': column.id},
                                 method='POST',
                                 is_ajax=True)
        self.assertTrue(status.is_success(resp.status_code))

        df = load_table(self.workflow.get_data_frame_table_name())
        self.assertTrue(df['Copy of Q01'].equals(df['Q01']))
Beispiel #14
0
    def test_table_pandas_get(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        # Get the data through the API
        response = self.client.get(
            reverse('table:api_pops', kwargs={'wid': workflow.id}))

        # Transform the response into a data frame
        r_df = string_to_df(response.data['data_frame'])

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())

        # Compare both elements
        self.compare_tables(r_df, df)
Beispiel #15
0
    def parse_data_frames(self):
        # Parse the two CSV strings and return as data frames

        if self.workflow:
            # Get the workflow data frame
            df_dst = load_table(self.workflow.get_data_frame_table_name())
        else:
            df_dst = load_df_from_csvfile(io.StringIO(self.csv1), 0, 0)

        df_src = load_df_from_csvfile(io.StringIO(self.csv2), 0, 0)

        # Fix the merge_info fields.
        self.merge_info['initial_column_names'] = list(df_src.columns)
        self.merge_info['rename_column_names'] = list(df_src.columns)
        self.merge_info['columns_to_upload'] = list(df_src.columns)

        return df_dst, df_src
Beispiel #16
0
    def test_04_merge_right(self):
        self.template_merge('right', rename=False)

        # Assert the content of the dataframe
        wflow = Workflow.objects.get(name=self.wf_name)
        df = load_table(wflow.get_data_frame_table_name())

        self.assertTrue('key' in set(df.columns))
        self.assertTrue('text3' in set(df.columns))
        self.assertTrue('double3' in set(df.columns))
        self.assertTrue('bool3' in set(df.columns))
        self.assertTrue('date3' in set(df.columns))

        assert check_wf_df(Workflow.objects.get(name='Testing Merge'))

        # End of session
        self.logout()
Beispiel #17
0
    def test_merge_inner(self):

        # Get the workflow
        self.workflow = Workflow.objects.all()[0]

        # Parse the source data frame
        df_dst, df_src = self.parse_data_frames()

        self.merge_info['how_merge'] = 'inner'

        result = perform_dataframe_upload_merge(self.workflow, df_dst, df_src,
                                                self.merge_info)

        # Load again the workflow data frame
        df_dst = load_table(self.workflow.get_data_frame_table_name())

        # Result must be correct (None)
        self.assertEquals(result, None)
Beispiel #18
0
    def test_tracking(self):
        """Test that tracking hits are properly stored."""
        # Repeat the checks two times to test if they are accumulating
        for idx in range(1, 3):
            # Iterate over the tracking items
            for trck in self.trck_tokens:
                self.client.get(reverse('trck') + '?v=' + trck)

            # Get the workflow and the data frame
            workflow = Workflow.objects.get(name=self.wflow_name)
            df = load_table(workflow.get_data_frame_table_name())

            # Check that the results have been updated in the DB (to 1)
            for uemail in [
                    x[1] for x in test.user_info if x[1].startswith('student')
            ]:
                self.assertEqual(
                    int(df.loc[df['email'] == uemail,
                               'EmailRead_1'].values[0]), idx)
Beispiel #19
0
    def test_table_pandas_merge_get(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        # Get the data through the API
        response = self.client.get(
            reverse('table:api_pmerge', kwargs={'wid': workflow.id}))

        workflow = Workflow.objects.all()[0]

        # Transform new table into string
        r_df = string_to_df(response.data['src_df'])

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())

        # Compare both elements and check wf df consistency
        self.compare_tables(r_df, df)
        self.assertTrue(check_wf_df(workflow))
def forwards(apps, schema_editor):
    if schema_editor.connection.alias != 'default':
        return
    # Traverse the workflows and verify that the columns are in the same
    # order than the columns in the workflow
    Workflow = apps.get_model('workflow', 'Workflow')

    for w in Workflow.objects.all():
        if not w.is_table_in_db(w):
            continue

        df = load_table(w.id)

        if len(df.columns) != w.ncols:
            raise Exception('Inconsistent number of columns')

        df_columns = list(df.columns)
        for c in w.columns.all():
            c.position = df_columns.index(c.name) + 1
            c.save()
Beispiel #21
0
 def get(
     self,
     request: HttpRequest,
     wid: int,
     format=None,
     workflow: Optional[Workflow] = None,
 ) -> HttpResponse:
     """Process the GET request."""
     # Try to retrieve the wflow to check for permissions
     serializer = self.serializer_class({
         'src_df':
         load_table(workflow.get_data_frame_table_name()),
         'how':
         '',
         'left_on':
         '',
         'right_on':
         ''
     })
     return Response(serializer.data)
Beispiel #22
0
    def test_random_column_add(self):
        """Test adding a random column."""
        # GET the form
        resp = self.get_response('workflow:random_column_add', is_ajax=True)
        self.assertTrue(status.is_success(resp.status_code))

        # Create the new question
        resp = self.get_response('workflow:random_column_add',
                                 method='POST',
                                 req_params={
                                     'name': 'RANDOM COLUMN',
                                     'description_text': 'RANDOM COLUMN DESC',
                                     'data_type': 'integer',
                                     'position': '0',
                                     'column_values': '12'
                                 },
                                 is_ajax=True)
        self.assertTrue(status.is_success(resp.status_code))

        df = load_table(self.workflow.get_data_frame_table_name())
        self.assertTrue(all(0 < num < 13 for num in df['RANDOM COLUMN']))
Beispiel #23
0
    def test_formula_column_add(self):
        """Test adding a formula column."""
        # GET the form
        resp = self.get_response('workflow:formula_column_add', is_ajax=True)
        self.assertTrue(status.is_success(resp.status_code))

        # Create the new question
        resp = self.get_response('workflow:formula_column_add',
                                 method='POST',
                                 req_params={
                                     'name': 'FORMULA COLUMN',
                                     'description_text': 'FORMULA COLUMN DESC',
                                     'data_type': 'integer',
                                     'position': '0',
                                     'columns': ['12', '13'],
                                     'op_type': 'sum'
                                 },
                                 is_ajax=True)
        self.assertTrue(status.is_success(resp.status_code))

        df = load_table(self.workflow.get_data_frame_table_name())
        self.assertTrue(df['FORMULA COLUMN'].equals(df['Q01'] + df['Q02']))
Beispiel #24
0
def _get_column_visualization_items(
    workflow,
    column,
    max_width=VISUALIZATION_WIDTH,
    max_height=VISUALIZATION_HEIGHT,
):
    """Get the visualization items (scripts and HTML) for a column.

    :param workflow: Workflow being processed

    :param column: Column requested

    :param max_width: Maximum width attribute in pixels

    :param max_width: Maximum height attribute in pixels

    :return: Tuple stat_data with descriptive stats, visualization scripts and
    visualization HTML
    """
    # Get the dataframe
    df = load_table(workflow.get_data_frame_table_name())

    # Extract the data to show at the top of the page
    stat_data = get_column_statistics(df[column.name])

    vis_scripts = []
    visualizations = _get_column_visualisations(
        column,
        df[[column.name]],
        vis_scripts,
        context={
            'style': 'max-width:{0}px; max-height:{1}px;'.format(
                max_width,
                max_height,
            ) + 'display:inline-block;',
        },
    )

    return stat_data, vis_scripts, visualizations
Beispiel #25
0
    def test_05_merge_outer_fail(self):
        self.template_merge('outer')

        # Assert that the error is at the top of the page
        self.assertIn(
            'Merge operation produced a result without any key columns.',
            self.selenium.page_source
        )

        # Assert the content of the dataframe
        wflow = Workflow.objects.get(name=self.wf_name)
        df = load_table(wflow.get_data_frame_table_name())

        self.assertTrue('key' in set(df.columns))
        self.assertTrue('key2' not in set(df.columns))
        self.assertTrue('text3' not in set(df.columns))
        self.assertTrue('double3' not in set(df.columns))
        self.assertTrue('bool3' not in set(df.columns))
        self.assertTrue('date3' not in set(df.columns))

        assert check_wf_df(Workflow.objects.get(name='Testing Merge'))

        # End of session
        self.logout()
Beispiel #26
0
def run_plugin(
    workflow,
    plugin_info,
    input_column_names,
    output_column_names,
    output_suffix,
    merge_key,
    plugin_params,
):
    """
    Execute the run method in the plugin.

    Execute the run method in a plugin with the dataframe from the given
    workflow

    :param workflow: Workflow object being processed

    :param plugin_info: PluginReistry object being processed

    :param input_column_names: List of input column names

    :param output_column_names: List of output column names

    :param output_suffix: Suffix that is added to the output column names

    :param merge_key: Key column to use in the merge

    :param plugin_params: Dictionary with the parameters to execute the plug in

    :return: Nothing, the result is stored in the log with log_id
    """
    plugin_instance, msgs = load_plugin(plugin_info.filename)
    if plugin_instance is None:
        raise Exception(
            ugettext('Unable to instantiate plugin "{0}"').format(
                plugin_info.name), )

    # Check that the list of given inputs is consistent: if plugin has a list
    # of inputs, it has to have the same length as the given list.
    if (plugin_instance.get_input_column_names()
            and len(plugin_instance.get_input_column_names()) !=
            len(input_column_names)):
        raise Exception(
            ugettext(
                'Inconsistent number of inputs when invoking plugin "{0}"',
            ).format(plugin_info.name), )

    # Check that the list of given outputs has the same length as the list of
    # outputs proposed by the plugin
    if (plugin_instance.get_output_column_names()
            and len(plugin_instance.get_output_column_names()) !=
            len(output_column_names)):
        raise Exception(
            ugettext(
                'Inconsistent number of outputs when invoking plugin "{0}"',
            ).format(plugin_info.name), )

    # Get the data frame from the workflow
    try:
        df = load_table(workflow.get_data_frame_table_name())
    except Exception as exc:
        raise Exception(
            ugettext(
                'Exception when retrieving the data frame from workflow: {0}',
            ).format(str(exc)), )

    # Set the updated names of the input, output columns, and the suffix
    if not plugin_instance.get_input_column_names():
        plugin_instance.input_column_names = input_column_names
    plugin_instance.output_column_names = output_column_names
    plugin_instance.output_suffix = output_suffix

    # Create a new dataframe with the given input columns, and rename them if
    # needed
    try:
        sub_df = pd.DataFrame(df[input_column_names])
        if plugin_instance.get_input_column_names():
            sub_df.columns = plugin_instance.get_input_column_names()
    except Exception as exc:
        raise Exception(
            ugettext('Error when creating data frame for plugin: {0}').format(
                str(exc)))

    # Try the execution and catch any exception
    try:
        new_df = plugin_instance.run(sub_df, parameters=plugin_params)
    except Exception as exc:
        raise Exception(
            ugettext('Error while executing plugin: {0}').format(str(exc)), )

    # If plugin does not return a data frame, flag as error
    if not isinstance(new_df, pd.DataFrame):
        raise Exception(
            ugettext(
                'Plugin executed but did not return a pandas data frame.'), )

    # Execution is DONE. Now we have to perform various additional checks

    # Result has to have the exact same number of rows
    if new_df.shape[0] != df.shape[0]:
        raise Exception(
            ugettext('Incorrect number of rows ({0}) in result data frame.',
                     ).format(new_df.shape[0]), )

    # Merge key name cannot be part of the output df
    if merge_key in new_df.columns:
        raise Exception(
            ugettext(
                'Column name {0} cannot be in the result data frame.'.format(
                    merge_key)), )

    # Result column names are consistent
    if set(new_df.columns) != set(plugin_instance.get_output_column_names()):
        raise Exception(ugettext('Incorrect columns in result data frame.'))

    # Add the merge column to the result df
    new_df[merge_key] = df[merge_key]

    # Proceed with the merge
    try:
        new_frame = perform_dataframe_upload_merge(
            workflow,
            df,
            new_df,
            {
                'how_merge': 'inner',
                'dst_selected_key': merge_key,
                'src_selected_key': merge_key,
                'initial_column_names': list(new_df.columns),
                'rename_column_names': list(new_df.columns),
                'columns_to_upload': [True] * len(list(new_df.columns)),
            },
        )
    except Exception as exc:
        raise Exception(
            ugettext('Error while merging result: {0}.').format(str(exc)), )

    if isinstance(new_frame, str):
        raise Exception(
            ugettext('Error while merging result: {0}.').format(new_frame))

    # Update execution time in the plugin
    plugin_info.executed = datetime.now(
        pytz.timezone(ontask_settings.TIME_ZONE), )
    plugin_info.save()

    return True
Beispiel #27
0
def stat_table_view(
    request: HttpRequest,
    pk: Optional[int] = None,
    workflow: Optional[Workflow] = None,
) -> HttpResponse:
    """Render the page with stats and visualizations for a view.

    :param request: HTTP request

    :param pk: View id to use

    :return: Render the page
    """
    # If the workflow has no data, something went wrong, go back to the
    # workflow details page
    if workflow.nrows == 0:
        messages.error(
            request,
            _('Unable to provide visualisation without data.'))
        return redirect('worflow:detail', workflow.id)

    # If a view is given, filter the columns
    view = None
    if pk:
        view = workflow.views.filter(pk=pk).first()
        if not view:
            # View not found. Redirect to workflow detail
            return redirect('home')
        columns_to_view = view.columns.filter(is_key=False)

        df = load_table(
            workflow.get_data_frame_table_name(),
            [col.name for col in columns_to_view],
            view.formula)
    else:
        # No view given, fetch the entire data frame
        columns_to_view = workflow.columns.filter(is_key=False)
        df = load_table(workflow.get_data_frame_table_name())

    vis_scripts = []
    visualizations = []
    idx = -1
    context = {'style': 'width:400px; height:225px;'}
    for column in columns_to_view:
        idx += 1

        # Skip primary keys (no point to represent any of them)
        if column.is_key:
            continue

        # Add the title and surrounding container
        visualizations.append('<h4>' + column.name + '</h4>')
        # If all values are empty, no need to proceed
        if all(not col_data for col_data in df[column.name]):
            visualizations.append('<p>No values in this column</p><hr/>')
            continue

        visualizations.append('<div style="display: inline-flex;">')

        column_viz = _get_column_visualisations(
            column,
            df[[column.name]],
            vis_scripts=vis_scripts,
            viz_id='column_{0}'.format(idx),
            context=context)

        visualizations.extend([vis.html_content for vis in column_viz])
        visualizations.append('</div><hr/>')

    return render(
        request,
        'table/stat_view.html',
        {'view': view,
         'vis_scripts': vis_scripts,
         'visualizations': visualizations})
Beispiel #28
0
def stat_row_view(
    request: HttpRequest,
    pk: Optional[int] = None,
    workflow: Optional[Workflow] = None,
) -> HttpResponse:
    """Render stats for a row.

    Render the page with stats and visualizations for a row in the table and
    a view (subset of columns). The request must include key and value to get
    the right row. In principle, there is a visualisation for each row.

    :param request: HTTP request

    :param pk: View id to use

    :return: Render the page
    """
    # If there is no workflow object, go back to the index
    # Get the pair key,value to fetch the row from the table
    update_key = request.GET.get('key')
    update_val = request.GET.get('val')

    if not update_key or not update_val:
        # Malformed request
        return render(
            request,
            'error.html',
            {'message': _('Unable to visualize table row')})

    # If a view is given, filter the columns
    columns_to_view = workflow.columns.all()
    column_names = workflow.get_column_names()
    if pk:
        view = workflow.views.filter(pk=pk).first()
        if not view:
            # View not found. Redirect to home
            return redirect('home')
        columns_to_view = view.columns.all()
        column_names = [col.name for col in columns_to_view]

        df = load_table(
            workflow.get_data_frame_table_name(),
            column_names,
            view.formula)
    else:
        # No view given, fetch the entire data frame
        df = load_table(workflow.get_data_frame_table_name())

    # Get the row from the table
    row = get_rows(
        workflow.get_data_frame_table_name(),
        column_names=column_names,
        filter_pairs={update_key: update_val},
    ).fetchone()

    vis_scripts = []
    visualizations = []
    context = {'style': 'width:400px; height:225px;'}
    for idx, column in enumerate(columns_to_view):

        # Skip primary keys (no point to represent any of them)
        if column.is_key:
            continue

        # Add the title and surrounding container
        visualizations.append('<h4>' + column.name + '</h4>')
        # If all values are empty, no need to proceed
        if all(not col for col in df[column.name]):
            visualizations.append(
                '<p>' + _('No values in this column') + '</p><hr/>')
            continue

        if row[column.name] is None or row[column.name] == '':
            visualizations.append(
                '<p class="alert-warning">'
                + _('No value for this student in this column')
                + '</p>',
            )

        visualizations.append('<div style="display: inline-flex;">')

        col_viz = _get_column_visualisations(
            column,
            df[[column.name]],
            vis_scripts=vis_scripts,
            viz_id='column_{0}'.format(idx),
            single_val=row[column.name],
            context=context)

        visualizations.extend([viz.html_content for viz in col_viz])
        visualizations.append('</div><hr/>')

    return render(
        request,
        'table/stat_row.html',
        {
            'value': update_val,
            'vis_scripts': vis_scripts,
            'visualizations': visualizations})
Beispiel #29
0
def random_column_add(
    request: HttpRequest,
    workflow: Optional[Workflow] = None,
) -> JsonResponse:
    """Create a column with random values (Modal).

    :param request:

    :return:
    """
    # Get the workflow element
    if workflow.nrows == 0:
        messages.error(request,
                       _('Cannot add column to a workflow without data'))
        return JsonResponse({'html_redirect': ''})

    # Form to read/process data
    form = RandomColumnAddForm(data=request.POST or None)

    if request.method == 'POST' and form.is_valid():

        # Save the column object attached to the form and add additional fields
        column = form.save(commit=False)
        column.workflow = workflow
        column.is_key = False

        # Save the instance
        try:
            column = form.save()
            form.save_m2m()
        except IntegrityError:
            form.add_error('name', _('A column with that name already exists'))
            return JsonResponse({
                'html_form':
                render_to_string(
                    'workflow/includes/partial_random_column_add.html',
                    {'form': form},
                    request=request),
            })

        # Update the data frame
        df = load_table(workflow.get_data_frame_table_name())

        # Get the values and interpret its meaning
        column_values = form.cleaned_data['column_values']
        # First, try to see if the field is a valid integer
        try:
            int_value = int(column_values)
        except ValueError:
            int_value = None

        if int_value:
            # At this point the field is an integer
            if int_value <= 1:
                form.add_error('values',
                               _('The integer value has to be larger than 1'))
                return JsonResponse({
                    'html_form':
                    render_to_string(
                        'workflow/includes/partial_random_column_add.html',
                        {'form': form},
                        request=request),
                })

            intvals = [idx + 1 for idx in range(int_value)]
        else:
            # At this point the field is a string and the values are the comma
            # separated strings.
            intvals = [
                valstr.strip() for valstr in column_values.strip().split(',')
                if valstr
            ]
            if not intvals:
                form.add_error(
                    'values',
                    _('The value has to be a comma-separated list'),
                )
                return JsonResponse({
                    'html_form':
                    render_to_string(
                        'workflow/includes/partial_random_column_add.html',
                        {'form': form},
                        request=request),
                })

        # Empty new column
        new_column = [None] * workflow.nrows
        # Create the random partitions
        partitions = _partition([idx for idx in range(workflow.nrows)],
                                len(intvals))

        # Assign values to partitions
        for idx, indexes in enumerate(partitions):
            for col_idx in indexes:
                new_column[col_idx] = intvals[idx]

        # Assign the new column to the data frame
        df[column.name] = new_column

        # Populate the column type
        column.data_type = pandas_datatype_names.get(
            df[column.name].dtype.name, )

        # Update the positions of the appropriate columns
        workflow.reposition_columns(workflow.ncols + 1, column.position)

        column.save()
        workflow.refresh_from_db()

        # Store the df to DB
        try:
            store_dataframe(df, workflow)
        except Exception as exc:
            messages.error(request,
                           _('Unable to add the column: {0}').format(str(exc)))
            return JsonResponse({'html_redirect': ''})

        # Log the event
        Log.objects.register(
            request.user, Log.COLUMN_ADD_RANDOM, workflow, {
                'id': workflow.id,
                'name': workflow.name,
                'column_name': column.name,
                'column_type': column.data_type,
                'value': column_values
            })

        # The form has been successfully processed
        return JsonResponse({'html_redirect': ''})

    return JsonResponse({
        'html_form':
        render_to_string('workflow/includes/partial_random_column_add.html',
                         {'form': form},
                         request=request),
    })
Beispiel #30
0
def formula_column_add(
    request: HttpRequest,
    workflow: Optional[Workflow] = None,
) -> JsonResponse:
    """Add a formula column.

    :param request:

    :return:
    """
    # Get the workflow element
    if workflow.nrows == 0:
        messages.error(
            request,
            _('Cannot add column to a workflow without data'),
        )
        return JsonResponse({'html_redirect': ''})

    # Form to read/process data
    form = FormulaColumnAddForm(
        form_data=request.POST or None,
        operands=_formula_column_operands,
        columns=workflow.columns.all(),
    )

    if request.method == 'POST' and form.is_valid():
        # Save the column object attached to the form and add additional fields
        column = form.save(commit=False)
        column.workflow = workflow
        column.is_key = False

        # Save the instance
        try:
            column.save()
            form.save_m2m()
        except IntegrityError:
            form.add_error('name', _('A column with that name already exists'))
            return JsonResponse({
                'html_form':
                render_to_string(
                    'workflow/includes/partial_formula_column_add.html',
                    {'form': form},
                    request=request),
            })

        # Update the data frame
        df = load_table(workflow.get_data_frame_table_name())

        try:
            # Add the column with the appropriate computation
            operation = form.cleaned_data['op_type']
            cnames = [col.name for col in form.selected_columns]
            df[column.name] = _op_distrib[operation](df[cnames])
        except Exception as exc:
            # Something went wrong in pandas, we need to remove the column
            column.delete()

            # Notify in the form
            form.add_error(
                None,
                _('Unable to add the column: {0}').format(exc, ),
            )
            return JsonResponse({
                'html_form':
                render_to_string(
                    'workflow/includes/partial_formula_column_add.html',
                    {'form': form},
                    request=request,
                ),
            })

        # Populate the column type
        column.data_type = pandas_datatype_names.get(
            df[column.name].dtype.name)

        # Update the positions of the appropriate columns
        workflow.reposition_columns(workflow.ncols + 1, column.position)

        # Save column and refresh the prefetched related in the workflow
        column.save()
        workflow.refresh_from_db()

        # Store the df to DB
        try:
            store_dataframe(df, workflow)
        except Exception as exc:
            messages.error(request,
                           _('Unable to clone column: {0}').format(str(exc)))
            return JsonResponse({'html_redirect': ''})

        # Log the event
        Log.objects.register(
            request.user, Log.COLUMN_ADD_FORMULA, workflow, {
                'id': workflow.id,
                'name': workflow.name,
                'column_name': column.name,
                'column_type': column.data_type
            })

        # The form has been successfully processed
        return JsonResponse({'html_redirect': ''})

    return JsonResponse({
        'html_form':
        render_to_string(
            'workflow/includes/partial_formula_column_add.html',
            {'form': form},
            request=request,
        ),
    })