def test_formula(self): # set up a formula to double the Amount column self.fpval.value= 'Amount*2' self.fpval.save() self.rpval.value= 'output' self.rpval.save() table = mock_csv_table.copy() table['output'] = table['Amount']*2.0 # need the .0 as output is going to be floating point response = self.client.get('/api/wfmodules/%d/render' % self.wfmodule.id) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.READY) self.assertEqual(response.content, make_render_json(table)) # empty result parameter should produce 'result' self.rpval.value = '' self.rpval.save() table = mock_csv_table.copy() table['result'] = table['Amount']*2.0 # need the .0 as output is going to be floating point response = self.client.get('/api/wfmodules/%d/render' % self.wfmodule.id) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.READY) self.assertEqual(response.content, make_render_json(table)) # formula with missing column name should error self.fpval.value = 'xxx*2' self.fpval.save() response = self.client.get('/api/wfmodules/%d/render' % self.wfmodule.id) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.ERROR) self.assertEqual(response.content, make_render_json(pd.DataFrame()))
def test_wf_module_histogram(self): # The column name for histogram counts, to prevent name conflicts INTERNAL_COUNT_COLNAME = '__internal_count_column__' # First module: no prior input, should be empty result response = self.client.get('/api/wfmodules/%d/histogram/Class' % self.wfmodule1.id) self.assertIs(response.status_code, status.HTTP_200_OK) test_data_json = make_render_json(pd.DataFrame()) self.assertEqual(response.content.decode('utf-8'), test_data_json) # Second module: histogram should be count 1 for each column response = self.client.get('/api/wfmodules/%d/histogram/Class' % self.wfmodule2.id) self.assertIs(response.status_code, status.HTTP_200_OK) test_data = self.test_table.groupby('Class').size().reset_index() test_data.columns = ['Class', INTERNAL_COUNT_COLNAME] test_data = test_data.sort_values(by=[INTERNAL_COUNT_COLNAME, 'Class'], ascending=[False, True]) test_data_json = make_render_json(test_data) self.assertEqual(response.content.decode('utf-8'), test_data_json) # Test for non-existent column; should return a 204 code response = self.client.get('/api/wfmodules/%d/histogram/O' % self.wfmodule2.id) self.assertIs(response.status_code, status.HTTP_400_BAD_REQUEST)
def test_load_xlsx(self): url = 'http://test.com/the.xlsx' self.url_pval.set_value(url) self.url_pval.save() xlsx_bytes = open(mock_xlsx_path, "rb").read() xlsx_table = pd.read_excel(mock_xlsx_path) # success case with requests_mock.Mocker() as m: m.get( url, content=xlsx_bytes, headers={ 'content-type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' }) self.press_fetch_button() response = self.get_render() self.assertEqual(response.content.decode('utf-8'), make_render_json(xlsx_table)) # malformed file should put module in error state with requests_mock.Mocker() as m: m.get( url, content=b"there's just no way this is xlsx", headers={ 'content-type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' }) self.press_fetch_button() self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.ERROR)
def test_load_json(self): url = 'http://test.com/the.json' self.url_pval.set_value(url) self.url_pval.save() # use a complex example with nested data fname = os.path.join(settings.BASE_DIR, 'server/tests/test_data/sfpd.json') sfpd_json = open(fname).read() sfpd_table = pd.DataFrame( json.loads(sfpd_json, object_pairs_hook=OrderedDict) ) # OrderedDict otherwise cols get sorted sanitize_dataframe(sfpd_table) # success case with requests_mock.Mocker() as m: m.get(url, text=sfpd_json, headers={'content-type': 'application/json'}) self.press_fetch_button() response = self.get_render() self.assertEqual(response.content.decode('utf-8'), make_render_json(sfpd_table)) # malformed json should put module in error state with requests_mock.Mocker() as m: m.get(url, text="there's just no way this is json", headers={'content-type': 'application/json'}) self.press_fetch_button() self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.ERROR)
def test_load_json(self): url = 'http://test.com/the.json' self.url_pval.set_value(url) self.url_pval.save() # success case with requests_mock.Mocker() as m: m.get(url, text=mock_json_text, headers={'content-type': 'application/json'}) self.press_fetch_button() response = self.get_render() self.assertEqual(response.content, make_render_json(mock_json_table)) # malformed json should put module in error state with requests_mock.Mocker() as m: m.get(url, text="there's just no way this is json", headers={'content-type': 'application/json'}) self.press_fetch_button() self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.ERROR) # success using json path with requests_mock.Mocker() as m: self.path_pval.set_value(mock_json_path) self.path_pval.save() m.get(url, text=mock_json_path_text, headers={'content-type': 'application/json'}) self.press_fetch_button() response = self.get_render() self.assertEqual(response.content, make_render_json(mock_json_table)) # bad json path should put module in error state with requests_mock.Mocker() as m: self.path_pval.set_value('hilarious') self.path_pval.save() m.get(url, text=mock_json_path_text, headers={'content-type': 'application/json'}) self.press_fetch_button() self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.ERROR)
def test_make_render_json(self): # test our basic test data output = make_render_json(self.test_table) self.assertTrue(isinstance(output, str)) d1 = json.loads(output) d2 = { 'total_rows': 4, 'start_row': 0, 'end_row': 4, 'columns': ['Class', 'M', 'F'], 'rows': [{ 'Class': 'math', 'F': 12, 'M': 10.0 }, { 'Class': 'english', 'F': 7, 'M': None }, { 'Class': 'history', 'F': 13, 'M': 11.0 }, { 'Class': 'economics', 'F': 20, 'M': 20.0 }], 'column_types': ['String', 'Number', 'Number'] } self.assertEqual(d1, d2) # Test some json conversion gotchas we encountered during development # simple test case where Pandas produces int64 column type, and json conversion throws ValueError # see https://github.com/pandas-dev/pandas/issues/13258#issuecomment-326671257 int64csv = 'A,B,C,D\n1,2,3,4' int64table = pd.read_csv(io.StringIO(int64csv), header=0) output = make_render_json(int64table) # When no header row, Pandas uses int64s as column names, and json.dumps(list(table)) throws ValueError int64table = pd.read_csv(io.StringIO(int64csv), header=None) output = make_render_json(int64table)
def test_wf_module_input(self): # First module: no prior input, should be empty result response = self.client.get('/api/wfmodules/%d/input' % self.wfmodule1.id) self.assertIs(response.status_code, status.HTTP_200_OK) test_data_json = make_render_json(pd.DataFrame()) self.assertEqual(response.content.decode('utf-8'), test_data_json) # Second module: input should be test data produced by first module response = self.client.get('/api/wfmodules/%d/input' % self.wfmodule2.id) self.assertIs(response.status_code, status.HTTP_200_OK) test_data_json = make_render_json(self.test_table) self.assertEqual(response.content.decode('utf-8'), test_data_json) # Third module: should be same as second, as second module is NOP response = self.client.get('/api/wfmodules/%d/input' % self.wfmodule3.id) self.assertIs(response.status_code, status.HTTP_200_OK) self.assertEqual(response.content.decode('utf-8'), test_data_json)
def test_wf_module_render_get(self): # First module: creates test data response = self.client.get('/api/wfmodules/%d/render' % self.wfmodule1.id) self.assertIs(response.status_code, status.HTTP_200_OK) test_data_json = make_render_json(self.test_table) self.assertEqual(response.content.decode('utf-8'), test_data_json) # second module: NOP response = self.client.get('/api/wfmodules/%d/render' % self.wfmodule2.id) self.assertIs(response.status_code, status.HTTP_200_OK) self.assertEqual(response.content.decode('utf-8'), test_data_json) # Third module: doubles M column response = self.client.get('/api/wfmodules/%d/render' % self.wfmodule3.id) self.assertIs(response.status_code, status.HTTP_200_OK) double_test_data = self.test_table.copy() double_test_data['M'] *= 2 double_test_data = make_render_json(double_test_data) self.assertEqual(response.content.decode('utf-8'), double_test_data) # Now test retrieving specified rows only response = self.client.get('/api/wfmodules/%d/render?startrow=1' % self.wfmodule1.id) self.assertIs(response.status_code, status.HTTP_200_OK) test_data_json = make_render_json(self.test_table, startrow=1) self.assertEqual(response.content.decode('utf-8'), test_data_json) response = self.client.get( '/api/wfmodules/%d/render?startrow=1&endrow=3' % self.wfmodule1.id) self.assertIs(response.status_code, status.HTTP_200_OK) test_data_json = make_render_json(self.test_table, startrow=1, endrow=3) self.assertEqual(response.content.decode('utf-8'), test_data_json) response = self.client.get('/api/wfmodules/%d/render?endrow=3' % self.wfmodule1.id) self.assertIs(response.status_code, status.HTTP_200_OK) test_data_json = make_render_json(self.test_table, endrow=3) self.assertEqual(response.content.decode('utf-8'), test_data_json) # index out of bounds should clip response = self.client.get( '/api/wfmodules/%d/render?startrow=-1&endrow=500' % self.wfmodule1.id) self.assertIs(response.status_code, status.HTTP_200_OK) test_data_json = make_render_json(self.test_table) self.assertEqual(response.content.decode('utf-8'), test_data_json) # index not a number -> bad request response = self.client.get( '/api/wfmodules/%d/render?startrow=0&endrow=frog' % self.wfmodule1.id) self.assertIs(response.status_code, status.HTTP_400_BAD_REQUEST)
def test_missing_module(self): # If the WfModule references a Module that does not exist, we should get a placeholder workflow = add_new_workflow('Missing module') wfm = add_new_wf_module(workflow, None, 0) response = self.client.get('/api/wfmodules/%d/' % wfm.id) self.assertIs(response.status_code, status.HTTP_200_OK) self.assertEqual(response.data['module_version']['module']['name'], 'Missing module') self.assertEqual( response.data['module_version']['module']['loads_data'], False) response = self.client.get('/api/wfmodules/%d/render' % wfm.id) self.assertIs(response.status_code, status.HTTP_200_OK) empty_table = make_render_json(pd.DataFrame()) self.assertEqual(response.content.decode('utf-8'), empty_table)
def test_load_csv(self): url = 'http://test.com/the.csv' self.url_pval.set_value(url) self.url_pval.save() # should be no data saved yet, no Deltas on the workflow self.assertIsNone(self.wfmodule.get_fetched_data_version()) self.assertIsNone(self.wfmodule.retrieve_fetched_table()) self.assertIsNone(self.wfmodule.workflow.last_delta) # success case with requests_mock.Mocker() as m: m.get(url, text=mock_csv_text, headers={'content-type': 'text/csv'}) self.press_fetch_button() response = self.get_render() self.assertEqual(response.content.decode('utf-8'), make_render_json(mock_csv_table)) # should create a new data version on the WfModule, and a new delta representing the change self.wfmodule.refresh_from_db() self.wfmodule.workflow.refresh_from_db() first_version = self.wfmodule.get_fetched_data_version() first_delta = self.wfmodule.workflow.last_delta first_check_time = self.wfmodule.last_update_check self.assertIsNotNone(first_version) self.assertIsNotNone(first_delta) # retrieving exactly the same data should not create a new data version or delta, should update check time with requests_mock.Mocker() as m: m.get(url, text=mock_csv_text, headers={'content-type': 'text/csv'}) self.press_fetch_button() self.wfmodule.refresh_from_db() self.wfmodule.workflow.refresh_from_db() self.assertEqual(self.wfmodule.get_fetched_data_version(), first_version) self.assertEqual(self.wfmodule.workflow.last_delta, first_delta) second_check_time = self.wfmodule.last_update_check self.assertNotEqual(second_check_time, first_check_time) # Retrieving different data should create a new data version and delta with requests_mock.Mocker() as m: m.get(url, text=mock_csv_text2, headers={'content-type': 'text/csv'}) self.press_fetch_button() response = self.get_render() self.assertEqual(response.content.decode('utf-8'), make_render_json(mock_csv_table2)) self.wfmodule.refresh_from_db() self.wfmodule.workflow.refresh_from_db() self.assertNotEqual(self.wfmodule.get_fetched_data_version(), first_version) self.assertNotEqual(self.wfmodule.workflow.last_delta, first_delta) self.assertNotEqual(self.wfmodule.last_update_check, second_check_time) # malformed CSV should put module in error state with requests_mock.Mocker() as m: m.get(url, text='a,b\n"1', headers={'content-type': 'text/csv'}) self.press_fetch_button() self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.ERROR)
def test_csv(self): response = self.client.get('/api/wfmodules/%d/render' % self.wf_module.id) table = pd.read_csv(io.StringIO(mock_csv_text)) self.assertEqual(response.content, make_render_json(table))