Python load_tableの例、ontask.dataops.pandas.load_table Pythonの例

コード例 #1

0

ファイルを表示

ファイル: stats.py プロジェクト: ritotombe/ontask_b

def _get_df_and_columns(
    workflow: Workflow,
    pk: int,
) -> Optional[Tuple[DataFrame, List, View]]:
    """Get the DF and the columns to process.

    :param workflow: Workflow object
    :param pk: Optional id for a view
    :return: Tuple DataFrame, List of columns (None, None if error
    """
    # If a view is given, filter the columns
    view = None
    if pk:
        view = workflow.views.filter(pk=pk).first()
        if not view:
            # View not found. Redirect to workflow detail
            return None
        columns_to_view = view.columns.filter(is_key=False)

        df = load_table(workflow.get_data_frame_table_name(),
                        [col.name for col in columns_to_view], view.formula)
    else:
        # No view given, fetch the entire data frame
        columns_to_view = workflow.columns.filter(is_key=False)
        df = load_table(workflow.get_data_frame_table_name(),
                        [col.name for col in columns_to_view])

    return df, columns_to_view, view

コード例 #2

0

ファイルを表示

ファイル: test_api.py プロジェクト: ritotombe/ontask_b

    def test_table_pandas_merge_to_outer_NaN(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        age = workflow.columns.filter(name='age')[0]
        age.is_key = False
        age.save()

        email = workflow.columns.filter(name='email')[0]
        email.is_key = False
        email.save()

        # Drop the column with booleans because the data type is lost
        workflow_delete_column(
            workflow,
            workflow.columns.get(name='registered')
        )

        # Transform new table into string
        r_df = pd.DataFrame(self.src_df2)

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())
        new_df = pd.merge(df, r_df, how="outer", left_on="sid", right_on="sid")

        # Get the data through the API
        response = self.client.put(
            reverse('table:api_pmerge', kwargs={'wid': workflow.id}),
            {
                "src_df": df_to_string(r_df),
                "how": "outer",
                "left_on": "sid",
                "right_on": "sid"
            },
            format='json')

        # Get the new workflow
        workflow = Workflow.objects.all()[0]

        # Result should have three rows as the initial DF
        self.assertEqual(workflow.nrows, 4)
        self.assertEqual(workflow.ncols, 8)

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())

        # Compare both elements and check wf df consistency
        self.compare_tables(df, new_df)

        # Check for df/wf consistency
        self.assertTrue(check_wf_df(workflow))

コード例 #3

0

ファイルを表示

ファイル: test_api.py プロジェクト: ritotombe/ontask_b

    def test_table_pandas_update(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        # Transform new table into string
        r_df = pd.DataFrame(self.new_table)
        r_df = detect_datetime_columns(r_df)

        # Upload a new table
        response = self.client.put(
            reverse(
                'table:api_pops',
                kwargs={'wid': workflow.id}),
            {'data_frame': df_to_string(r_df)},
            format='json')

        # Refresh wflow (has been updated)
        workflow = Workflow.objects.get(id=workflow.id)

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())

        # Compare both elements
        self.compare_tables(r_df, df)

        # Check that the rest of the
        # information is correct
        workflow = Workflow.objects.get(id=workflow.id)
        self.assertTrue(check_wf_df(workflow))

コード例 #4

0

ファイルを表示

ファイル: test_api.py プロジェクト: ritotombe/ontask_b

    def test_table_json_create(self):
        # Create a second workflow
        response = self.client.post(
            reverse('workflow:api_workflows'),
            {'name': test.wflow_name + '2', 'attributes': {'one': 'two'}},
            format='json')

        # Get the only workflow in the fixture
        workflow = Workflow.objects.get(id=response.data['id'])

        # Upload the table
        response = self.client.post(
            reverse('table:api_ops', kwargs={'wid': workflow.id}),
            {'data_frame': self.new_table},
            format='json')

        # Refresh wflow (has been updated)
        workflow = Workflow.objects.get(id=workflow.id)

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())
        # Transform new table into data frame
        r_df = pd.DataFrame(self.new_table)
        r_df = detect_datetime_columns(r_df)

        # Compare both elements
        self.compare_tables(r_df, df)

        # Check that the rest of the information is correct
        self.assertTrue(check_wf_df(workflow))

コード例 #5

0

ファイルを表示

    def test_table_JSON_merge_to_left(self):
        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.all()[0]

        age = workflow.columns.filter(name='age')[0]
        age.is_key = False
        age.save()

        email = workflow.columns.filter(name='email')[0]
        email.is_key = False
        email.save()

        # Get the data through the API
        self.client.put(reverse('table:api_merge',
                                kwargs={'wid': workflow.id}), {
                                    "src_df": self.src_df,
                                    "how": "left",
                                    "left_on": "sid",
                                    "right_on": "sid"
                                },
                        format='json')

        # Get the new workflow
        workflow = models.Workflow.objects.all()[0]

        # Result should have three rows as the initial DF
        self.assertEqual(workflow.nrows, 3)

        dframe = pandas.load_table(workflow.get_data_frame_table_name())
        self.assertEqual(dframe[dframe['sid'] == 1]['newcol'].values[0],
                         self.src_df['newcol'][0])

コード例 #6

0

ファイルを表示

    def test_table_pandas_create(self):
        # Create a second workflow
        response = self.client.post(reverse('workflow:api_workflows'), {
            'name': tests.wflow_name + '2',
            'attributes': {
                'one': 'two'
            }
        },
                                    format='json')

        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.get(id=response.data['id'])

        # Transform new table into a data frame
        r_df = pd.DataFrame(self.new_table)
        r_df = pandas.detect_datetime_columns(r_df)

        # Upload the table
        self.client.post(reverse('table:api_pops', kwargs={'wid':
                                                           workflow.id}),
                         {'data_frame': serializers.df_to_string(r_df)},
                         format='json')

        # Refresh wflow (has been updated)
        workflow = models.Workflow.objects.get(id=workflow.id)

        # Load the df from the db
        dframe = pandas.load_table(workflow.get_data_frame_table_name())

        # Compare both elements
        self.compare_tables(r_df, dframe)

コード例 #7

0

ファイルを表示

ファイル: ops.py プロジェクト: ritotombe/ontask_b

def workflow_restrict_column(column: Column) -> Optional[str]:
    """Set category of the column to the existing set of values.

    Given a workflow and a column, modifies the column so that only the
    values already present are allowed for future updates.

    :param column: Column object to restrict

    :return: String with error or None if correct
    """
    # Load the data frame
    data_frame = load_table(column.workflow.get_data_frame_table_name())

    cat_values = set(data_frame[column.name].dropna())
    if not cat_values:
        # Column has no meaningful values. Nothing to do.
        return _('Column has no meaningful values')

    # Set categories
    column.set_categories(list(cat_values))
    column.save()

    # Re-evaluate the operands in the workflow
    column.workflow.set_query_builder_ops()
    column.workflow.save()

    # Correct execution
    return None

コード例 #8

0

ファイルを表示

def restrict_column(user, column: models.Column):
    """Set category of the column to the existing set of values.

    Given a workflow and a column, modifies the column so that only the
    values already present are allowed for future updates.

    :param user: User executing this action
    :param column: Column object to restrict
    :return: String with error or None if correct
    """
    # Load the data frame
    data_frame = pandas.load_table(column.workflow.get_data_frame_table_name())

    cat_values = set(data_frame[column.name].dropna())
    if not cat_values:
        raise errors.OnTaskColumnCategoryValueError(
            message=_('The column has no meaningful values'))

    # Set categories
    column.set_categories(list(cat_values))

    # Re-evaluate the operands in the workflow
    column.log(user, models.Log.COLUMN_RESTRICT)
    column.workflow.set_query_builder_ops()
    column.workflow.save()

コード例 #9

0

ファイルを表示

ファイル: stats.py プロジェクト: ritotombe/ontask_b

def _get_column_visualization_items(
    workflow: Workflow,
    column: Column,
):
    """Get the visualization items (scripts and HTML) for a column.

    :param workflow: Workflow being processed

    :param column: Column requested

    :return: Tuple stat_data with descriptive stats, visualization scripts and
    visualization HTML
    """
    # Get the dataframe
    df = load_table(workflow.get_data_frame_table_name(), [column.name])

    # Extract the data to show at the top of the page
    stat_data = get_column_statistics(df[column.name])

    vis_scripts = []
    visualizations = _get_column_visualisations(
        column,
        df[[column.name]],
        vis_scripts,
        context={
            'style': 'width:100%; height:100%;' + 'display:inline-block;'
        },
    )

    return stat_data, vis_scripts, visualizations

コード例 #10

0

ファイルを表示

    def test_table_pandas_merge_to_left(self):
        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.all()[0]

        # Transform new table into string
        r_df = pd.DataFrame(self.src_df)

        # Get the data through the API
        self.client.put(reverse('table:api_pmerge',
                                kwargs={'wid': workflow.id}), {
                                    "src_df": serializers.df_to_string(r_df),
                                    "how": "left",
                                    "left_on": "sid",
                                    "right_on": "sid"
                                },
                        format='json')

        # Get the new workflow
        workflow = models.Workflow.objects.all()[0]

        # Result should have three rows as the initial DF
        self.assertEqual(workflow.nrows, 3)

        dframe = pandas.load_table(workflow.get_data_frame_table_name())
        self.assertEqual(dframe[dframe['sid'] == 1]['newcol'].values[0],
                         self.src_df['newcol'][0])

コード例 #11

0

ファイルを表示

ファイル: test_logic.py プロジェクト: ritotombe/ontask_b

    def test_action_1_preview(self):
        """Test that first action renders correctly."""
        self.workflow = Workflow.objects.all().first()
        self.user = get_user_model().objects.filter(
            email='*****@*****.**').first()
        attribute_value = list(self.workflow.attributes.values())[0]
        df = load_table(self.workflow.get_data_frame_table_name())

        for action_name in [self.action_name1, self.action_name2]:
            action = self.workflow.actions.get(name=action_name)
            for index, row in df.iterrows():
                condition_value = row['!#$%&()*+,-./\:;<=>?@[]^_`{|}~ 1'] < 12.5
                # JSON request to obtain preview
                resp = self.get_response('action:preview',
                                         url_params={
                                             'pk': action.id,
                                             'idx': index + 1
                                         },
                                         is_ajax=True)
                self.assertTrue(status.is_success(resp.status_code))
                self.assertTrue(attribute_value in str(resp.content))
                self.assertEquals('Condition 1' in str(resp.content),
                                  condition_value)
                self.assertEquals('Condition 2' in str(resp.content),
                                  condition_value)
                self.assertEquals('Condition 3' in str(resp.content),
                                  condition_value)
                self.assertEquals('Condition 4' in str(resp.content),
                                  condition_value)

コード例 #12

0

ファイルを表示

    def test_02_second_plugin(self):
        # Login
        self.login('*****@*****.**')

        # GO TO THE WORKFLOW PAGE
        self.access_workflow_from_home_page('Plugin test')

        # Open the transform page
        self.go_to_transform()

        # Click in the second plugin
        element = self.search_table_row_by_string('transform-table', 1,
                                                  'Test Plugin 2 Name')
        element.find_element_by_link_text('Test Plugin 2 Name').click()
        WebDriverWait(self.selenium, 10).until(
            EC.presence_of_element_located((By.NAME, 'csrfmiddlewaretoken')))
        # Spinner not visible
        WebDriverWait(self.selenium, 10).until_not(
            EC.visibility_of_element_located((By.ID, 'div-spinner')))

        # Provide the execution data (input columns and merge key
        self.selenium.find_element_by_id(
            "id____ontask___upload_input_0").click()
        Select(
            self.selenium.find_element_by_id(
                "id____ontask___upload_input_0")).select_by_visible_text('A1')
        self.selenium.find_element_by_id(
            "id____ontask___upload_input_1").click()
        Select(
            self.selenium.find_element_by_id(
                "id____ontask___upload_input_1")).select_by_visible_text('A2')
        # merge key
        self.select_plugin_output_tab()
        self.selenium.find_element_by_id("id_merge_key").click()
        Select(self.selenium.find_element_by_id(
            "id_merge_key")).select_by_visible_text("email")
        # Submit the execution
        self.selenium.find_element_by_name("Submit").click()

        WebDriverWait(self.selenium, 10).until(
            EC.text_to_be_present_in_element((By.XPATH, "//body/div/h1"),
                                             'Plugin scheduled for execution'))

        # There should be a message on that page
        self.assertTrue(
            self.selenium.find_element_by_xpath(
                "//div[@id='plugin-run-done']/div/a").text.startswith(
                    'You may check the status in log number'))

        # Assert the content of the dataframe
        wflow = models.Workflow.objects.get(name='Plugin test')
        df = pandas.load_table(wflow.get_data_frame_table_name())
        self.assertTrue('RESULT 3' in set(df.columns))
        self.assertTrue('RESULT 4' in set(df.columns))
        self.assertTrue(df['RESULT 3'].equals(df['A1'] + df['A2']))
        self.assertTrue(df['RESULT 4'].equals(df['A1'] - df['A2']))

        # End of session
        self.logout()

コード例 #13

0

ファイルを表示

ファイル: api.py プロジェクト: ritotombe/ontask_b

    def put(
        self,
        request: HttpRequest,
        wid: int,
        format=None,
        workflow: Optional[Workflow] = None,
    ) -> HttpResponse:
        """Process the put request."""
        # Get the dst_df
        dst_df = load_table(workflow.get_data_frame_table_name())

        serializer = self.serializer_class(data=request.data)
        if not serializer.is_valid():
            return Response(serializer.errors,
                            status=status.HTTP_400_BAD_REQUEST)

        # Check that the parameters are correct
        how = serializer.validated_data['how']
        if how == '' or how not in ['left', 'right', 'outer', 'inner']:
            raise APIException(
                _('how must be one of left, right, outer or inner'))

        left_on = serializer.validated_data['left_on']
        if not is_unique_column(dst_df[left_on]):
            raise APIException(
                _('column {0} does not contain a unique key.').format(left_on))

        # Operation has been accepted by the serializer
        src_df = serializer.validated_data['src_df']

        right_on = serializer.validated_data['right_on']
        if right_on not in list(src_df.columns):
            raise APIException(
                _('column {0} not found in data frame').format(right_on))

        if not is_unique_column(src_df[right_on]):
            raise APIException(
                _('column {0} does not contain a unique key.').format(
                    right_on))

        merge_info = {
            'how_merge': how,
            'dst_selected_key': left_on,
            'src_selected_key': right_on,
            'initial_column_names': list(src_df.columns),
            'rename_column_names': list(src_df.columns),
            'columns_to_upload': [True] * len(list(src_df.columns)),
        }

        # Ready to perform the MERGE
        try:
            perform_dataframe_upload_merge(workflow, dst_df, src_df,
                                           merge_info)
        except Exception as exc:
            raise APIException(
                _('Unable to perform merge operation: {0}').format(str(exc)))

        # Merge went through.
        return Response(serializer.data, status=status.HTTP_201_CREATED)

コード例 #14

0

ファイルを表示

ファイル: upload_steps.py プロジェクト: ubc/ontask_b

def upload_step_four(
    request: http.HttpRequest,
    workflow: models.Workflow,
    upload_data: Dict,
) -> http.HttpResponse:
    """Perform the merge operation.

    :param request: Received request
    :param workflow: Workflow being processed
    :param upload_data: Dictionary with all the information about the merge.
    :return: HttpResponse
    """
    # Get the dataframes to merge
    try:
        dst_df = pandas.load_table(workflow.get_data_frame_table_name())
        src_df = pandas.load_table(workflow.get_upload_table_name())
    except Exception:
        return render(request, 'error.html',
                      {'message': _('Exception while loading data frame')})

    try:
        pandas.perform_dataframe_upload_merge(workflow, dst_df, src_df,
                                              upload_data)
    except Exception as exc:
        # Nuke the temporary table
        sql.delete_table(workflow.get_upload_table_name())
        col_info = workflow.get_column_info()
        workflow.log(request.user,
                     models.Log.WORKFLOW_DATA_FAILEDMERGE,
                     column_names=col_info[0],
                     column_types=col_info[1],
                     column_unique=col_info[2],
                     error_message=str(exc))
        messages.error(request, _('Merge operation failed. ') + str(exc))
        return redirect(reverse('table:display'))

    col_info = workflow.get_column_info()
    workflow.log(request.user,
                 upload_data['log_upload'],
                 column_names=col_info[0],
                 column_types=col_info[1],
                 column_unique=col_info[2])
    store_workflow_in_session(request.session, workflow)
    request.session.pop('upload_data', None)

    return redirect(reverse('table:display'))

コード例 #15

0

ファイルを表示

    def test_merge_inner(self):

        # Get the workflow
        self.workflow = models.Workflow.objects.all()[0]

        # Parse the source data frame
        df_dst, df_src = self.parse_data_frames()

        self.merge_info['how_merge'] = 'inner'

        result = pandas.perform_dataframe_upload_merge(self.workflow, df_dst,
                                                       df_src, self.merge_info)

        # Load again the workflow data frame
        pandas.load_table(self.workflow.get_data_frame_table_name())

        # Result must be correct (None)
        self.assertEquals(result, None)

コード例 #16

0

ファイルを表示

ファイル: column_ops.py プロジェクト: LucasFranciscoCorreia/ontask_b

def column_restrict_values(
    request: HttpRequest,
    pk: int,
    workflow: Optional[Workflow] = None,
    column: Optional[Column] = None,
) -> JsonResponse:
    """Restrict future values in this column to one of those already present.

    :param request: HTTP request

    :param pk: ID of the column to restrict. The workflow element is taken
     from the session.

    :return: Render the delete column form
    """
    # If the columns is unique and it is the only one, we cannot allow
    # the operation
    if column.is_key:
        # This is the only key column
        messages.error(request, _('You cannot restrict a key column'))
        return JsonResponse({'html_redirect': reverse('workflow:detail')})

    # Get the name of the column to delete
    context = {'pk': pk, 'cname': column.name}

    # Get the values from the data frame
    df = load_table(workflow.get_data_frame_table_name())
    context['values'] = ', '.join(set(df[column.name]))

    if request.method == 'POST':
        # Proceed restricting the column
        error_txt = workflow_restrict_column(column)

        if isinstance(error_txt, str):
            # Something went wrong. Show it
            messages.error(request, error_txt)

            # Log the event
            Log.objects.register(
                request.user,
                Log.COLUMN_RESTRICT,
                workflow,
                {
                    'id': workflow.id,
                    'name': workflow.name,
                    'column_name': column.name,
                    'values': context['values']})

        return JsonResponse({'html_redirect': reverse('workflow:detail')})

    return JsonResponse({
        'html_form': render_to_string(
            'workflow/includes/partial_column_restrict.html',
            context,
            request=request),
    })

コード例 #17

0

ファイルを表示

ファイル: test_logic.py プロジェクト: ritotombe/ontask_b

    def parse_data_frames(self):
        # Parse the two CSV strings and return as data frames

        if self.workflow:
            # Get the workflow data frame
            df_dst = load_table(self.workflow.get_data_frame_table_name())
        else:
            df_dst = load_df_from_csvfile(io.StringIO(self.csv1), 0, 0)

        df_src = load_df_from_csvfile(io.StringIO(self.csv2), 0, 0)
        store_table(df_src, 'TEMPORARY_TABLE')
        df_src = load_table('TEMPORARY_TABLE')
        # Fix the merge_info fields.

        self.merge_info['initial_column_names'] = list(df_src.columns)
        self.merge_info['rename_column_names'] = list(df_src.columns)
        self.merge_info['columns_to_upload'] = list(df_src.columns)

        return df_dst, df_src

コード例 #18

0

ファイルを表示

ファイル: checks.py プロジェクト: cliffnyendwe/ontask_b

def check_wf_df(workflow: models.Workflow) -> bool:
    """Check consistency between Workflow info and the data frame.

    Check the consistency between the information stored in the workflow
    and the structure of the underlying dataframe

    :param workflow: Workflow object
    :return: Boolean stating the result of the check. True: Correct.
    """
    # Get the df
    df = pandas.load_table(workflow.get_data_frame_table_name())

    # Set values in case there is no df
    if df is not None:
        dfnrows = df.shape[0]
        dfncols = df.shape[1]
        df_col_names = list(df.columns)
    else:
        dfnrows = 0
        dfncols = 0
        df_col_names = []

    # Check 1: Number of rows and columns
    assert workflow.nrows == dfnrows, 'Inconsistent number of rows'
    assert workflow.ncols == dfncols, 'Inconsistent number of columns'

    # Identical sets of columns
    wf_cols = workflow.columns.all()
    assert set(df_col_names) == {col.name
                                 for col in wf_cols
                                 }, ('Inconsistent set of columns')

    # Identical data types
    # for n1, n2 in zip(wf_cols, df_col_names):
    for col in wf_cols:
        df_dt = pandas.datatype_names.get(df[col.name].dtype.name)
        if col.data_type == 'boolean' and df_dt == 'string':
            # This is the case of a column with Boolean and Nulls
            continue

        assert col.data_type == df_dt, ('Inconsistent data type {0}'.format(
            col.name))

    # Verify that the columns marked as unique are preserved
    for col in workflow.columns.filter(is_key=True):
        assert pandas.is_unique_column(
            df[col.name]), ('Column {0} should be unique.'.format(col.name))

    # Columns are properly numbered
    cpos = workflow.columns.values_list('position', flat=True)
    rng = range(1, len(cpos) + 1)
    assert sorted(cpos) == list(rng)

    return True

コード例 #19

0

ファイルを表示

ファイル: api.py プロジェクト: ritotombe/ontask_b

 def get(
     self,
     request: HttpRequest,
     wid: int,
     format=None,
     workflow: Optional[Workflow] = None,
 ) -> HttpResponse:
     """Retrieve the existing data frame."""
     serializer = self.serializer_class(
         {'data_frame': load_table(workflow.get_data_frame_table_name())})
     return Response(serializer.data)

コード例 #20

0

ファイルを表示

ファイル: test_logic.py プロジェクト: cliffnyendwe/ontask_b

    def test_create_random_column_string_form(self):
        """Create a random string column"""
        # Get the workflow first
        self.workflow = models.Workflow.objects.all().first()

        # Column name and current number of them
        cname = 'random_column'
        ncols = self.workflow.ncols

        # JSON POST request for column creation with string value
        resp = self.get_response('workflow:random_column_add',
                                 method='POST',
                                 req_params={
                                     'name': cname,
                                     'data_type': 'string',
                                     'raw_categories': 'bogus',
                                     'position': 0
                                 },
                                 is_ajax=True)

        resp_content = json.loads(resp.content)
        self.assertTrue('html_form' in resp_content)
        self.assertTrue(status.is_success(resp.status_code))
        self.workflow.refresh_from_db()
        self.assertEqual(self.workflow.ncols, ncols)
        new_column = self.workflow.columns.filter(name=cname).first()
        self.assertIsNone(new_column)

        # JSON POST request for column creation with a string list
        resp = self.get_response('workflow:random_column_add',
                                 method='POST',
                                 req_params={
                                     'name': cname,
                                     'data_type': 'string',
                                     'raw_categories': 'one, two, three',
                                     'position': 0
                                 },
                                 is_ajax=True)

        resp_content = json.loads(resp.content)
        self.assertTrue('html_form' not in resp_content)
        self.assertTrue(status.is_success(resp.status_code))
        self.workflow.refresh_from_db()
        self.assertEqual(self.workflow.ncols, ncols + 1)
        new_column = self.workflow.columns.filter(name=cname).first()
        self.assertIsNotNone(new_column)
        self.assertEqual(new_column.name, cname)
        self.assertEqual(new_column.data_type, 'string')
        data_frame = pandas.load_table(
            self.workflow.get_data_frame_table_name())
        self.assertTrue(
            all(element in ['one', 'two', 'three']
                for element in data_frame[cname]))

コード例 #21

0

ファイルを表示

ファイル: test_logic.py プロジェクト: ritotombe/ontask_b

    def do_sql_txt_operand(self,
                           input_value,
                           op_value,
                           type_value,
                           value,
                           row_yes=1,
                           row_no=1):

        self.set_skel(input_value, op_value.format(''), type_value, value,
                      'v_' + type_value)
        data_frame = load_table(self.test_table, self.test_columns, self.skel)
        self.assertEqual(data_frame.shape[0], row_yes)
        evaluate_formula(self.skel, EVAL_TXT)

        if op_value.find('{0}') != -1:
            self.set_skel(input_value, op_value.format('not_'), type_value,
                          value, 'v_' + type_value)
            data_frame = load_table(self.test_table, self.test_columns,
                                    self.skel)
            self.assertEqual(data_frame.shape[0], row_no)
            evaluate_formula(self.skel, EVAL_TXT)

コード例 #22

0

ファイルを表示

def add_formula_column(
    user,
    workflow: models.Workflow,
    column: models.Column,
    operation: str,
    selected_columns: List[models.Column],
):
    """Add the formula column to the workflow.

    :param user: User making the request
    :param workflow: Workflow to add the column
    :param column: Column being added
    :param operation: string denoting the operation
    :param selected_columns: List of columns selected for the operation.
    :return: Column is added to the workflow
    """
    # Save the column object attached to the form and add additional fields
    column.workflow = workflow
    column.is_key = False

    # Save the instance
    column.save()

    # Update the data frame
    df = pandas.load_table(workflow.get_data_frame_table_name())

    # Add the column with the appropriate computation
    cnames = [col.name for col in selected_columns]
    df[column.name] = _op_distrib[operation](df[cnames])

    # Populate the column type
    column.data_type = pandas.datatype_names.get(
        df[column.name].dtype.name)

    # Update the positions of the appropriate columns
    workflow.reposition_columns(workflow.ncols + 1, column.position)
    column.save()
    workflow.refresh_from_db()

    # Store the df to DB
    try:
        pandas.store_dataframe(df, workflow)
    except Exception as exc:
        raise services.OnTaskWorkflowAddColumn(
            message=_('Unable to add column: {0}').format(str(exc)),
            to_delete=[column])

    workflow.ncols = workflow.columns.count()
    workflow.save()
    column.log(user, models.Log.COLUMN_ADD_FORMULA)

コード例 #23

0

ファイルを表示

    def test_column_clone(self):
        """Test adding a random column."""
        column = self.workflow.columns.get(name='Q01')
        resp = self.get_response('workflow:column_clone', {'pk': column.id},
                                 is_ajax=True)
        self.assertTrue(status.is_success(resp.status_code))

        # Create the new question
        resp = self.get_response('workflow:column_clone', {'pk': column.id},
                                 method='POST',
                                 is_ajax=True)
        self.assertTrue(status.is_success(resp.status_code))

        df = pandas.load_table(self.workflow.get_data_frame_table_name())
        self.assertTrue(df['Copy of Q01'].equals(df['Q01']))

コード例 #24

0

ファイルを表示

ファイル: api.py プロジェクト: ubc/ontask_b

 def post(
     self,
     request: http.HttpRequest,
     wid: int,
     format=None,
     workflow: Optional[models.Workflow] = None,
 ) -> http.HttpResponse:
     """Create a new data frame."""
     if pandas.load_table(workflow.get_data_frame_table_name()) is not None:
         raise APIException(
             _('Post request requires workflow without a table'))
     return self.override(request,
                          wid=wid,
                          format=format,
                          workflow=workflow)

コード例 #25

0

ファイルを表示

    def test_04_merge_right(self):
        self.template_merge('right', rename=False)

        # Assert the content of the dataframe
        wflow = models.Workflow.objects.get(name=self.wf_name)
        df = pandas.load_table(wflow.get_data_frame_table_name())

        self.assertTrue('key' in set(df.columns))
        self.assertTrue('text3' in set(df.columns))
        self.assertTrue('double3' in set(df.columns))
        self.assertTrue('bool3' in set(df.columns))
        self.assertTrue('date3' in set(df.columns))

        # End of session
        self.logout()

コード例 #26

0

ファイルを表示

    def test_table_pandas_get(self):
        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.all()[0]

        # Get the data through the API
        response = self.client.get(
            reverse('table:api_pops', kwargs={'wid': workflow.id}))

        # Transform the response into a data frame
        r_df = serializers.string_to_df(response.data['data_frame'])

        # Load the df from the db
        dframe = pandas.load_table(workflow.get_data_frame_table_name())

        # Compare both elements
        self.compare_tables(r_df, dframe)

コード例 #27

0

ファイルを表示

    def test_table_JSON_get(self):
        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.all()[0]

        # Get the data through the API
        response = self.client.get(
            reverse('table:api_ops', kwargs={'wid': workflow.id}))

        # Transform the response into a data frame
        r_df = pd.DataFrame(response.data['data_frame'])
        r_df = pandas.detect_datetime_columns(r_df)

        # Load the df from the db
        dframe = pandas.load_table(workflow.get_data_frame_table_name())

        # Compare both elements
        self.compare_tables(r_df, dframe)

コード例 #28

0

ファイルを表示

ファイル: test_views.py プロジェクト: ritotombe/ontask_b

    def test_03_merge_left(self):
        self.template_merge('left')

        # Assert the content of the dataframe
        wflow = Workflow.objects.get(name=self.wf_name)
        df = load_table(wflow.get_data_frame_table_name())

        self.assertTrue('key' in set(df.columns))
        self.assertTrue('key2' in set(df.columns))
        self.assertTrue('text3' in set(df.columns))
        self.assertTrue('double3' in set(df.columns))
        self.assertTrue('bool3' in set(df.columns))
        self.assertTrue('date3' in set(df.columns))

        assert check_wf_df(Workflow.objects.get(name='Testing Merge'))

        # End of session
        self.logout()

コード例 #29

0

ファイルを表示

    def test_table_pandas_merge_get(self):
        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.all()[0]

        # Get the data through the API
        response = self.client.get(
            reverse('table:api_pmerge', kwargs={'wid': workflow.id}))

        workflow = models.Workflow.objects.all()[0]

        # Transform new table into string
        r_df = serializers.string_to_df(response.data['src_df'])

        # Load the df from the db
        dframe = pandas.load_table(workflow.get_data_frame_table_name())

        # Compare both elements and check wf df consistency
        self.compare_tables(r_df, dframe)

コード例 #30

0

ファイルを表示

ファイル: api.py プロジェクト: ubc/ontask_b

    def put(
        self,
        request: http.HttpRequest,
        wid: int,
        format=None,
        workflow: Optional[models.Workflow] = None,
    ) -> http.HttpResponse:
        """Process the put request."""
        del wid, format
        # Get the dst_df
        dst_df = pandas.load_table(workflow.get_data_frame_table_name())

        serializer = self.serializer_class(data=request.data)
        if not serializer.is_valid():
            return Response(serializer.errors,
                            status=status.HTTP_400_BAD_REQUEST)

        src_df = serializer.validated_data['src_df']
        how = serializer.validated_data['how']
        left_on = serializer.validated_data['left_on']
        right_on = serializer.validated_data['right_on']
        error = pandas.validate_merge_parameters(dst_df, src_df, how, left_on,
                                                 right_on)

        if error:
            raise APIException(error)

        # Ready to perform the MERGE
        try:
            pandas.perform_dataframe_upload_merge(
                workflow, dst_df, src_df, {
                    'how_merge': how,
                    'dst_selected_key': left_on,
                    'src_selected_key': right_on,
                    'initial_column_names': list(src_df.columns),
                    'rename_column_names': list(src_df.columns),
                    'columns_to_upload': [True] * len(list(src_df.columns))
                })
        except Exception as exc:
            raise APIException(
                _('Unable to perform merge operation: {0}').format(str(exc)))

        # Merge went through.
        return Response(serializer.data, status=status.HTTP_201_CREATED)