def test_table_pandas_merge_to_left(self): # Get the only workflow in the fixture workflow = models.Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.src_df) # Get the data through the API self.client.put(reverse('table:api_pmerge', kwargs={'wid': workflow.id}), { "src_df": serializers.df_to_string(r_df), "how": "left", "left_on": "sid", "right_on": "sid" }, format='json') # Get the new workflow workflow = models.Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 3) dframe = pandas.load_table(workflow.get_data_frame_table_name()) self.assertEqual(dframe[dframe['sid'] == 1]['newcol'].values[0], self.src_df['newcol'][0])
def test_table_pandas_merge_to_outer(self): # Get the only workflow in the fixture workflow = models.Workflow.objects.all()[0] age = workflow.columns.filter(name='age')[0] age.is_key = False age.save() email = workflow.columns.filter(name='email')[0] email.is_key = False email.save() # Transform new table into string r_df = pd.DataFrame(self.src_df) # Get the data through the API response = self.client.put(reverse( 'table:api_pmerge', kwargs={'wid': workflow.id}), { "src_df": serializers.df_to_string(r_df), "how": "outer", "left_on": "sid", "right_on": "sid" }, format='json') # No anomaly should be detected self.assertEqual(None, response.data.get('detail')) # Get the new workflow workflow = models.Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 4)
def test_table_pandas_create(self): # Create a second workflow response = self.client.post(reverse('workflow:api_workflows'), { 'name': tests.wflow_name + '2', 'attributes': { 'one': 'two' } }, format='json') # Get the only workflow in the fixture workflow = models.Workflow.objects.get(id=response.data['id']) # Transform new table into a data frame r_df = pd.DataFrame(self.new_table) r_df = pandas.detect_datetime_columns(r_df) # Upload the table self.client.post(reverse('table:api_pops', kwargs={'wid': workflow.id}), {'data_frame': serializers.df_to_string(r_df)}, format='json') # Refresh wflow (has been updated) workflow = models.Workflow.objects.get(id=workflow.id) # Load the df from the db dframe = pandas.load_table(workflow.get_data_frame_table_name()) # Compare both elements self.compare_tables(r_df, dframe)
def test_table_pandas_merge_to_outer_NaN(self): # Get the only workflow in the fixture workflow = models.Workflow.objects.all()[0] age = workflow.columns.filter(name='age')[0] age.is_key = False age.save() email = workflow.columns.filter(name='email')[0] email.is_key = False email.save() # Drop the column with booleans because the data type is lost delete_column(self.user, workflow, workflow.columns.get(name='registered')) # Transform new table into string r_df = pd.DataFrame(self.src_df2) # Load the df from the db dframe = pandas.load_table(workflow.get_data_frame_table_name()) new_df = pd.merge(dframe, r_df, how="outer", left_on="sid", right_on="sid") # Get the data through the API self.client.put(reverse('table:api_pmerge', kwargs={'wid': workflow.id}), { "src_df": serializers.df_to_string(r_df), "how": "outer", "left_on": "sid", "right_on": "sid" }, format='json') # Get the new workflow workflow = models.Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 4) self.assertEqual(workflow.ncols, 8) # Load the df from the db dframe = pandas.load_table(workflow.get_data_frame_table_name()) # Compare both elements and check wf df consistency self.compare_tables(dframe, new_df)
def test_table_pandas_merge_to_empty(self): # Get the only workflow in the fixture workflow = models.Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.new_table) # Get the data through the API response = self.client.put(reverse( 'table:api_pmerge', kwargs={'wid': workflow.id}), { "src_df": serializers.df_to_string(r_df), "how": "inner", "left_on": "sid", "right_on": "sid" }, format='json') self.assertEqual( response.data['detail'], 'Unable to perform merge operation: ' + 'Merge operation produced a result with no rows')
def test_table_pandas_update(self): # Get the only workflow in the fixture workflow = models.Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.new_table) r_df = pandas.detect_datetime_columns(r_df) # Upload a new table self.client.put(reverse('table:api_pops', kwargs={'wid': workflow.id}), {'data_frame': serializers.df_to_string(r_df)}, format='json') # Refresh wflow (has been updated) workflow = models.Workflow.objects.get(id=workflow.id) # Load the df from the db dframe = pandas.load_table(workflow.get_data_frame_table_name()) # Compare both elements self.compare_tables(r_df, dframe)
def test_serializer_pandas(self): """Test the data frame serialization.""" df = pd.DataFrame({ 'key': ['k1', 'k2'], 't1': ['t1', 't2'], 'i2': [5, 6], 'f3': [7.0, 8.0], 'b4': [True, False], 'd5': ['2018-10-11 21:12:04+10:30', '2018-10-12 21:12:04+10:30'], }) df_str = serializers.df_to_string(df) new_df = serializers.DataFramePandasSerializer( data={'data_frame': df_str}, many=False, ) self.assertTrue(new_df.is_valid()) new_df = new_df.validated_data['data_frame'] self.assertTrue(df.equals(new_df))
def test_table_pandas_merge_to_inner(self): # Get the only workflow in the fixture workflow = models.Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.src_df) # Get the data through the API self.client.put(reverse('table:api_pmerge', kwargs={'wid': workflow.id}), { "src_df": serializers.df_to_string(r_df), "how": "inner", "left_on": "sid", "right_on": "sid" }, format='json') # Get the updated object workflow = models.Workflow.objects.all()[0] # Result should have two rows self.assertEqual(workflow.nrows, 2)