Example #1
0
    def test_table_pandas_merge_to_inner(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        # Transform new table into string
        r_df = pd.DataFrame(self.src_df)

        # Get the data through the API
        response = self.client.put(reverse('table:api_pmerge',
                                           kwargs={'wid': workflow.id}), {
                                               "src_df": df_to_string(r_df),
                                               "how": "inner",
                                               "left_on": "sid",
                                               "right_on": "sid"
                                           },
                                   format='json')

        # Get the updated object
        workflow = Workflow.objects.all()[0]

        # Result should have two rows
        self.assertEqual(workflow.nrows, 2)

        # Check for df/wf consistency
        self.assertTrue(check_wf_df(workflow))
Example #2
0
    def test_table_pandas_merge_to_left(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        # Transform new table into string
        r_df = pd.DataFrame(self.src_df)

        # Get the data through the API
        response = self.client.put(reverse('table:api_pmerge',
                                           kwargs={'wid': workflow.id}), {
                                               "src_df": df_to_string(r_df),
                                               "how": "left",
                                               "left_on": "sid",
                                               "right_on": "sid"
                                           },
                                   format='json')

        # Get the new workflow
        workflow = Workflow.objects.all()[0]

        # Result should have three rows as the initial DF
        self.assertEqual(workflow.nrows, 3)

        df = load_table(workflow.get_data_frame_table_name())
        self.assertEqual(df[df['sid'] == 1]['newcol'].values[0],
                         self.src_df['newcol'][0])

        # Check for df/wf consistency
        self.assertTrue(check_wf_df(workflow))
Example #3
0
    def test_table_pandas_update(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        # Transform new table into string
        r_df = pd.DataFrame(self.new_table)
        r_df = detect_datetime_columns(r_df)

        # Upload a new table
        response = self.client.put(reverse('table:api_pops',
                                           kwargs={'wid': workflow.id}),
                                   {'data_frame': df_to_string(r_df)},
                                   format='json')

        # Refresh wflow (has been updated)
        workflow = Workflow.objects.get(id=workflow.id)

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())

        # Compare both elements
        self.compare_tables(r_df, df)

        # Check that the rest of the
        # information is correct
        workflow = Workflow.objects.get(id=workflow.id)
        self.assertTrue(check_wf_df(workflow))
Example #4
0
    def test_table_pandas_create(self):
        # Create a second workflow
        response = self.client.post(reverse('workflow:api_workflows'), {
            'name': test.wflow_name + '2',
            'attributes': {
                'one': 'two'
            }
        },
                                    format='json')

        # Get the only workflow in the fixture
        workflow = Workflow.objects.get(id=response.data['id'])

        # Transform new table into a data frame
        r_df = pd.DataFrame(self.new_table)
        r_df = detect_datetime_columns(r_df)

        # Upload the table
        response = self.client.post(reverse('table:api_pops',
                                            kwargs={'wid': workflow.id}),
                                    {'data_frame': df_to_string(r_df)},
                                    format='json')

        # Refresh wflow (has been updated)
        workflow = Workflow.objects.get(id=workflow.id)

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())

        # Compare both elements
        self.compare_tables(r_df, df)

        # Check that the rest of the
        # information is correct
        self.assertTrue(check_wf_df(workflow))
Example #5
0
    def test_serializer_pandas(self):
        """Test the data frame serialization."""

        df = pd.DataFrame(
            {
                'key': ['k1', 'k2'],
                't1': ['t1', 't2'],
                'i2': [5, 6],
                'f3': [7.0, 8.0],
                'b4': [True, False],
                'd5': [
                    '2018-10-11 21:12:04+10:30',
                    '2018-10-12 21:12:04+10:30'],
            })

        df_str = df_to_string(df)

        new_df = DataFramePandasSerializer(
            data={'data_frame': df_str},
            many=False,
        )
        self.assertTrue(new_df.is_valid())
        new_df = new_df.validated_data['data_frame']

        self.assertTrue(df.equals(new_df))
Example #6
0
    def test_table_pandas_merge_to_outer_NaN(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        age = workflow.columns.filter(name='age')[0]
        age.is_key = False
        age.save()

        email = workflow.columns.filter(name='email')[0]
        email.is_key = False
        email.save()

        # Drop the column with booleans because the data type is lost
        workflow_delete_column(workflow,
                               workflow.columns.get(name='registered'))

        # Transform new table into string
        r_df = pd.DataFrame(self.src_df2)

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())
        new_df = pd.merge(df, r_df, how="outer", left_on="sid", right_on="sid")

        # Get the data through the API
        response = self.client.put(reverse('table:api_pmerge',
                                           kwargs={'wid': workflow.id}), {
                                               "src_df": df_to_string(r_df),
                                               "how": "outer",
                                               "left_on": "sid",
                                               "right_on": "sid"
                                           },
                                   format='json')

        # Get the new workflow
        workflow = Workflow.objects.all()[0]

        # Result should have three rows as the initial DF
        self.assertEqual(workflow.nrows, 4)
        self.assertEqual(workflow.ncols, 8)

        # Load the df from the db
        df = load_table(workflow.get_data_frame_table_name())

        # Compare both elements and check wf df consistency
        self.compare_tables(df, new_df)

        # Check for df/wf consistency
        self.assertTrue(check_wf_df(workflow))
Example #7
0
    def test_table_pandas_merge_to_outer(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        age = workflow.columns.filter(name='age')[0]
        age.is_key = False
        age.save()

        email = workflow.columns.filter(name='email')[0]
        email.is_key = False
        email.save()

        # Transform new table into string
        r_df = pd.DataFrame(self.src_df)

        # Get the data through the API
        response = self.client.put(reverse('table:api_pmerge',
                                           kwargs={'wid': workflow.id}), {
                                               "src_df": df_to_string(r_df),
                                               "how": "outer",
                                               "left_on": "sid",
                                               "right_on": "sid"
                                           },
                                   format='json')

        # No anomaly should be detected
        self.assertEqual(None, response.data.get('detail'))

        # Get the new workflow
        workflow = Workflow.objects.all()[0]

        # Result should have three rows as the initial DF
        self.assertEqual(workflow.nrows, 4)

        # Check for df/wf consistency
        self.assertTrue(check_wf_df(workflow))
Example #8
0
    def test_table_pandas_merge_to_empty(self):
        # Get the only workflow in the fixture
        workflow = Workflow.objects.all()[0]

        # Transform new table into string
        r_df = pd.DataFrame(self.new_table)

        # Get the data through the API
        response = self.client.put(reverse('table:api_pmerge',
                                           kwargs={'wid': workflow.id}), {
                                               "src_df": df_to_string(r_df),
                                               "how": "inner",
                                               "left_on": "sid",
                                               "right_on": "sid"
                                           },
                                   format='json')

        self.assertEqual(
            response.data['detail'], 'Unable to perform merge operation: ' +
            'Merge operation produced a result with no rows')

        # Check for df/wf consistency
        workflow = Workflow.objects.all()[0]
        self.assertTrue(check_wf_df(workflow))