def test_table_pandas_merge_to_left(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.src_df) # Get the data through the API response = self.client.put(reverse( 'table:api_pmerge', kwargs={'pk': workflow.id}), { "src_df": serializers.df_to_string(r_df), "how": "left", "left_on": "sid", "right_on": "sid" }, format='json') # Get the new workflow workflow = Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 3) df = pandas_db.load_from_db(workflow.id) self.assertEqual(df[df['sid'] == 1]['newcol'].values[0], self.src_df['newcol'][0]) # Check for df/wf consistency self.assertTrue(pandas_db.check_wf_df(workflow))
def test_table_pandas_merge_to_inner(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.src_df) # Get the data through the API response = self.client.put(reverse( 'table:api_pmerge', kwargs={'pk': workflow.id}), { "src_df": serializers.df_to_string(r_df), "how": "inner", "left_on": "sid", "right_on": "sid" }, format='json') # Get the updated object workflow = Workflow.objects.all()[0] # Result should have two rows self.assertEqual(workflow.nrows, 2) # Check for df/wf consistency self.assertTrue(pandas_db.check_wf_df(workflow))
def test_table_pandas_merge_to_outer(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.src_df) # Get the data through the API response = self.client.put(reverse( 'table:api_pmerge', kwargs={'pk': workflow.id}), { "src_df": serializers.df_to_string(r_df), "how": "outer", "left_on": "sid", "right_on": "sid" }, format='json') # Anomaly detected self.assertIn('New values in column', response.data['detail']) self.assertIn('are not of type', response.data['detail']) # Get the new workflow workflow = Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 3) # Check for df/wf consistency self.assertTrue(pandas_db.check_wf_df(workflow))
def test_table_pandas_create(self): # Create a second workflow response = self.client.post(reverse('workflow:api_workflows'), { 'name': test.wflow_name + '2', 'attributes': { 'one': 'two' } }, format='json') # Get the only workflow in the fixture workflow = Workflow.objects.get(pk=response.data['id']) # Transform new table into a data frame r_df = pd.DataFrame(self.new_table) r_df = ops.detect_datetime_columns(r_df) # Upload the table response = self.client.post( reverse('table:api_pops', kwargs={'pk': workflow.id}), {'data_frame': serializers.df_to_string(r_df)}, format='json') # Load the df from the db df = pandas_db.load_from_db(workflow.id) # Compare both elements self.compare_tables(r_df, df) # Refresh wflow (has been updated) and check that the rest of the # information is correct workflow = Workflow.objects.get(pk=workflow.id) self.assertTrue(pandas_db.check_wf_df(workflow))
def test_table_pandas_merge_to_empty(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.new_table) # Get the data through the API response = self.client.put( reverse('table:api_pmerge', kwargs={'pk': workflow.id}), { "src_df": serializers.df_to_string(r_df), "how": "inner", "left_on": "sid", "right_on": "sid", "dup_column": "override" }, format='json') self.assertEqual(response.data['detail'], 'Merge operation produced a result with no rows') # Check for df/wf consistency workflow = Workflow.objects.all()[0] self.assertTrue(pandas_db.check_wf_df(workflow))
def test_table_pandas_update(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.new_table) r_df = ops.detect_datetime_columns(r_df) # Upload a new table response = self.client.put( reverse('table:api_pops', kwargs={'pk': workflow.id}), {'data_frame': serializers.df_to_string(r_df)}, format='json') # Load the df from the db df = pandas_db.load_from_db(workflow.id) # Compare both elements self.compare_tables(r_df, df) # Refresh wflow (has been updated) and check that the rest of the # information is correct workflow = Workflow.objects.get(pk=workflow.id) self.assertTrue(pandas_db.check_wf_df(workflow))
def test_table_pandas_merge_to_outer_NaN(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] age = workflow.columns.filter(name='age')[0] age.is_key = False age.save() email = workflow.columns.filter(name='email')[0] email.is_key = False email.save() # Drop the column with booleans because the data type is lost workflow_delete_column( workflow, Column.objects.get(workflow=workflow, name='registered')) # Transform new table into string r_df = pd.DataFrame(self.src_df2) # Load the df from the db df = pandas_db.load_from_db(workflow.id) new_df = pd.merge(df, r_df, how="outer", left_on="sid", right_on="sid") # Get the data through the API response = self.client.put(reverse( 'table:api_pmerge', kwargs={'pk': workflow.id}), { "src_df": serializers.df_to_string(r_df), "how": "outer", "left_on": "sid", "right_on": "sid" }, format='json') # Get the new workflow workflow = Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 4) self.assertEqual(workflow.ncols, 8) # Load the df from the db df = pandas_db.load_from_db(workflow.id) # Compare both elements and check wf df consistency self.compare_tables(df, new_df) # Check for df/wf consistency self.assertTrue(pandas_db.check_wf_df(workflow))
def test_table_pandas_merge_to_outer(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] age = workflow.columns.filter(name='age')[0] age.is_key = False age.save() email = workflow.columns.filter(name='email')[0] email.is_key = False email.save() # Transform new table into string r_df = pd.DataFrame(self.src_df) # Get the data through the API response = self.client.put(reverse( 'table:api_pmerge', kwargs={'pk': workflow.id}), { "src_df": serializers.df_to_string(r_df), "how": "outer", "left_on": "sid", "right_on": "sid" }, format='json') # No anomaly should be detected self.assertEqual(None, response.data.get('detail', None)) # Get the new workflow workflow = Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 4) # Check for df/wf consistency self.assertTrue(pandas_db.check_wf_df(workflow))