Ejemplo n.º 1
0
    def test_table_pandas_merge_to_left(self):
        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.all()[0]

        # Transform new table into string
        r_df = pd.DataFrame(self.src_df)

        # Get the data through the API
        self.client.put(reverse('table:api_pmerge',
                                kwargs={'wid': workflow.id}), {
                                    "src_df": serializers.df_to_string(r_df),
                                    "how": "left",
                                    "left_on": "sid",
                                    "right_on": "sid"
                                },
                        format='json')

        # Get the new workflow
        workflow = models.Workflow.objects.all()[0]

        # Result should have three rows as the initial DF
        self.assertEqual(workflow.nrows, 3)

        dframe = pandas.load_table(workflow.get_data_frame_table_name())
        self.assertEqual(dframe[dframe['sid'] == 1]['newcol'].values[0],
                         self.src_df['newcol'][0])
Ejemplo n.º 2
0
    def test_table_pandas_merge_to_outer(self):
        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.all()[0]

        age = workflow.columns.filter(name='age')[0]
        age.is_key = False
        age.save()

        email = workflow.columns.filter(name='email')[0]
        email.is_key = False
        email.save()

        # Transform new table into string
        r_df = pd.DataFrame(self.src_df)

        # Get the data through the API
        response = self.client.put(reverse(
            'table:api_pmerge', kwargs={'wid': workflow.id}), {
                "src_df": serializers.df_to_string(r_df),
                "how": "outer",
                "left_on": "sid",
                "right_on": "sid"
            },
                                   format='json')

        # No anomaly should be detected
        self.assertEqual(None, response.data.get('detail'))

        # Get the new workflow
        workflow = models.Workflow.objects.all()[0]

        # Result should have three rows as the initial DF
        self.assertEqual(workflow.nrows, 4)
Ejemplo n.º 3
0
    def test_table_pandas_create(self):
        # Create a second workflow
        response = self.client.post(reverse('workflow:api_workflows'), {
            'name': tests.wflow_name + '2',
            'attributes': {
                'one': 'two'
            }
        },
                                    format='json')

        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.get(id=response.data['id'])

        # Transform new table into a data frame
        r_df = pd.DataFrame(self.new_table)
        r_df = pandas.detect_datetime_columns(r_df)

        # Upload the table
        self.client.post(reverse('table:api_pops', kwargs={'wid':
                                                           workflow.id}),
                         {'data_frame': serializers.df_to_string(r_df)},
                         format='json')

        # Refresh wflow (has been updated)
        workflow = models.Workflow.objects.get(id=workflow.id)

        # Load the df from the db
        dframe = pandas.load_table(workflow.get_data_frame_table_name())

        # Compare both elements
        self.compare_tables(r_df, dframe)
Ejemplo n.º 4
0
    def test_table_pandas_merge_to_outer_NaN(self):
        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.all()[0]

        age = workflow.columns.filter(name='age')[0]
        age.is_key = False
        age.save()

        email = workflow.columns.filter(name='email')[0]
        email.is_key = False
        email.save()

        # Drop the column with booleans because the data type is lost
        delete_column(self.user, workflow,
                      workflow.columns.get(name='registered'))

        # Transform new table into string
        r_df = pd.DataFrame(self.src_df2)

        # Load the df from the db
        dframe = pandas.load_table(workflow.get_data_frame_table_name())
        new_df = pd.merge(dframe,
                          r_df,
                          how="outer",
                          left_on="sid",
                          right_on="sid")

        # Get the data through the API
        self.client.put(reverse('table:api_pmerge',
                                kwargs={'wid': workflow.id}), {
                                    "src_df": serializers.df_to_string(r_df),
                                    "how": "outer",
                                    "left_on": "sid",
                                    "right_on": "sid"
                                },
                        format='json')

        # Get the new workflow
        workflow = models.Workflow.objects.all()[0]

        # Result should have three rows as the initial DF
        self.assertEqual(workflow.nrows, 4)
        self.assertEqual(workflow.ncols, 8)

        # Load the df from the db
        dframe = pandas.load_table(workflow.get_data_frame_table_name())

        # Compare both elements and check wf df consistency
        self.compare_tables(dframe, new_df)
Ejemplo n.º 5
0
    def test_table_pandas_merge_to_empty(self):
        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.all()[0]

        # Transform new table into string
        r_df = pd.DataFrame(self.new_table)

        # Get the data through the API
        response = self.client.put(reverse(
            'table:api_pmerge', kwargs={'wid': workflow.id}), {
                "src_df": serializers.df_to_string(r_df),
                "how": "inner",
                "left_on": "sid",
                "right_on": "sid"
            },
                                   format='json')

        self.assertEqual(
            response.data['detail'], 'Unable to perform merge operation: ' +
            'Merge operation produced a result with no rows')
Ejemplo n.º 6
0
    def test_table_pandas_update(self):
        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.all()[0]

        # Transform new table into string
        r_df = pd.DataFrame(self.new_table)
        r_df = pandas.detect_datetime_columns(r_df)

        # Upload a new table
        self.client.put(reverse('table:api_pops', kwargs={'wid': workflow.id}),
                        {'data_frame': serializers.df_to_string(r_df)},
                        format='json')

        # Refresh wflow (has been updated)
        workflow = models.Workflow.objects.get(id=workflow.id)

        # Load the df from the db
        dframe = pandas.load_table(workflow.get_data_frame_table_name())

        # Compare both elements
        self.compare_tables(r_df, dframe)
Ejemplo n.º 7
0
    def test_serializer_pandas(self):
        """Test the data frame serialization."""

        df = pd.DataFrame({
            'key': ['k1', 'k2'],
            't1': ['t1', 't2'],
            'i2': [5, 6],
            'f3': [7.0, 8.0],
            'b4': [True, False],
            'd5': ['2018-10-11 21:12:04+10:30', '2018-10-12 21:12:04+10:30'],
        })

        df_str = serializers.df_to_string(df)

        new_df = serializers.DataFramePandasSerializer(
            data={'data_frame': df_str},
            many=False,
        )
        self.assertTrue(new_df.is_valid())
        new_df = new_df.validated_data['data_frame']

        self.assertTrue(df.equals(new_df))
Ejemplo n.º 8
0
    def test_table_pandas_merge_to_inner(self):
        # Get the only workflow in the fixture
        workflow = models.Workflow.objects.all()[0]

        # Transform new table into string
        r_df = pd.DataFrame(self.src_df)

        # Get the data through the API
        self.client.put(reverse('table:api_pmerge',
                                kwargs={'wid': workflow.id}), {
                                    "src_df": serializers.df_to_string(r_df),
                                    "how": "inner",
                                    "left_on": "sid",
                                    "right_on": "sid"
                                },
                        format='json')

        # Get the updated object
        workflow = models.Workflow.objects.all()[0]

        # Result should have two rows
        self.assertEqual(workflow.nrows, 2)