def test_formula(self):
        # set up a formula to double the Amount column
        self.fpval.value= 'Amount*2'
        self.fpval.save()
        self.rpval.value= 'output'
        self.rpval.save()
        table = mock_csv_table.copy()
        table['output'] = table['Amount']*2.0  # need the .0 as output is going to be floating point

        response = self.client.get('/api/wfmodules/%d/render' % self.wfmodule.id)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.READY)
        self.assertEqual(response.content, make_render_json(table))

        # empty result parameter should produce 'result'
        self.rpval.value = ''
        self.rpval.save()
        table = mock_csv_table.copy()
        table['result'] = table['Amount']*2.0  # need the .0 as output is going to be floating point
        response = self.client.get('/api/wfmodules/%d/render' % self.wfmodule.id)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.READY)
        self.assertEqual(response.content, make_render_json(table))

        # formula with missing column name should error
        self.fpval.value = 'xxx*2'
        self.fpval.save()
        response = self.client.get('/api/wfmodules/%d/render' % self.wfmodule.id)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.ERROR)
        self.assertEqual(response.content, make_render_json(pd.DataFrame()))
Exemple #2
0
    def test_wf_module_histogram(self):
        # The column name for histogram counts, to prevent name conflicts
        INTERNAL_COUNT_COLNAME = '__internal_count_column__'

        # First module: no prior input, should be empty result
        response = self.client.get('/api/wfmodules/%d/histogram/Class' %
                                   self.wfmodule1.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        test_data_json = make_render_json(pd.DataFrame())
        self.assertEqual(response.content.decode('utf-8'), test_data_json)

        # Second module: histogram should be count 1 for each column
        response = self.client.get('/api/wfmodules/%d/histogram/Class' %
                                   self.wfmodule2.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        test_data = self.test_table.groupby('Class').size().reset_index()
        test_data.columns = ['Class', INTERNAL_COUNT_COLNAME]
        test_data = test_data.sort_values(by=[INTERNAL_COUNT_COLNAME, 'Class'],
                                          ascending=[False, True])
        test_data_json = make_render_json(test_data)
        self.assertEqual(response.content.decode('utf-8'), test_data_json)

        # Test for non-existent column; should return a 204 code
        response = self.client.get('/api/wfmodules/%d/histogram/O' %
                                   self.wfmodule2.id)
        self.assertIs(response.status_code, status.HTTP_400_BAD_REQUEST)
    def test_load_xlsx(self):
        url = 'http://test.com/the.xlsx'
        self.url_pval.set_value(url)
        self.url_pval.save()

        xlsx_bytes = open(mock_xlsx_path, "rb").read()
        xlsx_table = pd.read_excel(mock_xlsx_path)

        # success case
        with requests_mock.Mocker() as m:
            m.get(
                url,
                content=xlsx_bytes,
                headers={
                    'content-type':
                    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
                })
            self.press_fetch_button()
            response = self.get_render()
            self.assertEqual(response.content.decode('utf-8'),
                             make_render_json(xlsx_table))

        # malformed file  should put module in error state
        with requests_mock.Mocker() as m:
            m.get(
                url,
                content=b"there's just no way this is xlsx",
                headers={
                    'content-type':
                    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
                })
            self.press_fetch_button()
            self.wfmodule.refresh_from_db()
            self.assertEqual(self.wfmodule.status, WfModule.ERROR)
    def test_load_json(self):
        url = 'http://test.com/the.json'
        self.url_pval.set_value(url)
        self.url_pval.save()

        # use a complex example with nested data
        fname = os.path.join(settings.BASE_DIR,
                             'server/tests/test_data/sfpd.json')
        sfpd_json = open(fname).read()
        sfpd_table = pd.DataFrame(
            json.loads(sfpd_json, object_pairs_hook=OrderedDict)
        )  # OrderedDict otherwise cols get sorted
        sanitize_dataframe(sfpd_table)

        # success case
        with requests_mock.Mocker() as m:
            m.get(url,
                  text=sfpd_json,
                  headers={'content-type': 'application/json'})
            self.press_fetch_button()
            response = self.get_render()
            self.assertEqual(response.content.decode('utf-8'),
                             make_render_json(sfpd_table))

        # malformed json should put module in error state
        with requests_mock.Mocker() as m:
            m.get(url,
                  text="there's just no way this is json",
                  headers={'content-type': 'application/json'})
            self.press_fetch_button()
            self.wfmodule.refresh_from_db()
            self.assertEqual(self.wfmodule.status, WfModule.ERROR)
Exemple #5
0
    def test_load_json(self):
        url = 'http://test.com/the.json'
        self.url_pval.set_value(url)
        self.url_pval.save()

        # success case
        with requests_mock.Mocker() as m:
            m.get(url,
                  text=mock_json_text,
                  headers={'content-type': 'application/json'})
            self.press_fetch_button()
            response = self.get_render()
            self.assertEqual(response.content,
                             make_render_json(mock_json_table))

        # malformed json should put module in error state
        with requests_mock.Mocker() as m:
            m.get(url,
                  text="there's just no way this is json",
                  headers={'content-type': 'application/json'})
            self.press_fetch_button()
            self.wfmodule.refresh_from_db()
            self.assertEqual(self.wfmodule.status, WfModule.ERROR)

        # success using json path
        with requests_mock.Mocker() as m:
            self.path_pval.set_value(mock_json_path)
            self.path_pval.save()
            m.get(url,
                  text=mock_json_path_text,
                  headers={'content-type': 'application/json'})
            self.press_fetch_button()
            response = self.get_render()
            self.assertEqual(response.content,
                             make_render_json(mock_json_table))

        # bad json path should put module in error state
        with requests_mock.Mocker() as m:
            self.path_pval.set_value('hilarious')
            self.path_pval.save()
            m.get(url,
                  text=mock_json_path_text,
                  headers={'content-type': 'application/json'})
            self.press_fetch_button()
            self.wfmodule.refresh_from_db()
            self.assertEqual(self.wfmodule.status, WfModule.ERROR)
Exemple #6
0
    def test_make_render_json(self):
        # test our basic test data
        output = make_render_json(self.test_table)
        self.assertTrue(isinstance(output, str))
        d1 = json.loads(output)
        d2 = {
            'total_rows':
            4,
            'start_row':
            0,
            'end_row':
            4,
            'columns': ['Class', 'M', 'F'],
            'rows': [{
                'Class': 'math',
                'F': 12,
                'M': 10.0
            }, {
                'Class': 'english',
                'F': 7,
                'M': None
            }, {
                'Class': 'history',
                'F': 13,
                'M': 11.0
            }, {
                'Class': 'economics',
                'F': 20,
                'M': 20.0
            }],
            'column_types': ['String', 'Number', 'Number']
        }
        self.assertEqual(d1, d2)

        # Test some json conversion gotchas we encountered during development

        # simple test case where Pandas produces int64 column type, and json conversion throws ValueError
        # see https://github.com/pandas-dev/pandas/issues/13258#issuecomment-326671257
        int64csv = 'A,B,C,D\n1,2,3,4'
        int64table = pd.read_csv(io.StringIO(int64csv), header=0)
        output = make_render_json(int64table)

        # When no header row, Pandas uses int64s as column names, and json.dumps(list(table)) throws ValueError
        int64table = pd.read_csv(io.StringIO(int64csv), header=None)
        output = make_render_json(int64table)
Exemple #7
0
    def test_wf_module_input(self):
        # First module: no prior input, should be empty result
        response = self.client.get('/api/wfmodules/%d/input' %
                                   self.wfmodule1.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        test_data_json = make_render_json(pd.DataFrame())
        self.assertEqual(response.content.decode('utf-8'), test_data_json)

        # Second module: input should be test data produced by first module
        response = self.client.get('/api/wfmodules/%d/input' %
                                   self.wfmodule2.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        test_data_json = make_render_json(self.test_table)
        self.assertEqual(response.content.decode('utf-8'), test_data_json)

        # Third module: should be same as second, as second module is NOP
        response = self.client.get('/api/wfmodules/%d/input' %
                                   self.wfmodule3.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        self.assertEqual(response.content.decode('utf-8'), test_data_json)
Exemple #8
0
    def test_wf_module_render_get(self):
        # First module: creates test data
        response = self.client.get('/api/wfmodules/%d/render' %
                                   self.wfmodule1.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        test_data_json = make_render_json(self.test_table)
        self.assertEqual(response.content.decode('utf-8'), test_data_json)

        # second module: NOP
        response = self.client.get('/api/wfmodules/%d/render' %
                                   self.wfmodule2.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        self.assertEqual(response.content.decode('utf-8'), test_data_json)

        # Third module: doubles M column
        response = self.client.get('/api/wfmodules/%d/render' %
                                   self.wfmodule3.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        double_test_data = self.test_table.copy()
        double_test_data['M'] *= 2
        double_test_data = make_render_json(double_test_data)
        self.assertEqual(response.content.decode('utf-8'), double_test_data)

        # Now test retrieving specified rows only
        response = self.client.get('/api/wfmodules/%d/render?startrow=1' %
                                   self.wfmodule1.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        test_data_json = make_render_json(self.test_table, startrow=1)
        self.assertEqual(response.content.decode('utf-8'), test_data_json)

        response = self.client.get(
            '/api/wfmodules/%d/render?startrow=1&endrow=3' % self.wfmodule1.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        test_data_json = make_render_json(self.test_table,
                                          startrow=1,
                                          endrow=3)
        self.assertEqual(response.content.decode('utf-8'), test_data_json)

        response = self.client.get('/api/wfmodules/%d/render?endrow=3' %
                                   self.wfmodule1.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        test_data_json = make_render_json(self.test_table, endrow=3)
        self.assertEqual(response.content.decode('utf-8'), test_data_json)

        # index out of bounds should clip
        response = self.client.get(
            '/api/wfmodules/%d/render?startrow=-1&endrow=500' %
            self.wfmodule1.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        test_data_json = make_render_json(self.test_table)
        self.assertEqual(response.content.decode('utf-8'), test_data_json)

        # index not a number -> bad request
        response = self.client.get(
            '/api/wfmodules/%d/render?startrow=0&endrow=frog' %
            self.wfmodule1.id)
        self.assertIs(response.status_code, status.HTTP_400_BAD_REQUEST)
Exemple #9
0
    def test_missing_module(self):
        # If the WfModule references a Module that does not exist, we should get a placeholder
        workflow = add_new_workflow('Missing module')
        wfm = add_new_wf_module(workflow, None, 0)
        response = self.client.get('/api/wfmodules/%d/' % wfm.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        self.assertEqual(response.data['module_version']['module']['name'],
                         'Missing module')
        self.assertEqual(
            response.data['module_version']['module']['loads_data'], False)

        response = self.client.get('/api/wfmodules/%d/render' % wfm.id)
        self.assertIs(response.status_code, status.HTTP_200_OK)
        empty_table = make_render_json(pd.DataFrame())
        self.assertEqual(response.content.decode('utf-8'), empty_table)
Exemple #10
0
    def test_load_csv(self):
        url = 'http://test.com/the.csv'
        self.url_pval.set_value(url)
        self.url_pval.save()

        # should be no data saved yet, no Deltas on the workflow
        self.assertIsNone(self.wfmodule.get_fetched_data_version())
        self.assertIsNone(self.wfmodule.retrieve_fetched_table())
        self.assertIsNone(self.wfmodule.workflow.last_delta)

        # success case
        with requests_mock.Mocker() as m:
            m.get(url,
                  text=mock_csv_text,
                  headers={'content-type': 'text/csv'})
            self.press_fetch_button()
            response = self.get_render()
            self.assertEqual(response.content.decode('utf-8'),
                             make_render_json(mock_csv_table))

            # should create a new data version on the WfModule, and a new delta representing the change
            self.wfmodule.refresh_from_db()
            self.wfmodule.workflow.refresh_from_db()
            first_version = self.wfmodule.get_fetched_data_version()
            first_delta = self.wfmodule.workflow.last_delta
            first_check_time = self.wfmodule.last_update_check
            self.assertIsNotNone(first_version)
            self.assertIsNotNone(first_delta)

        # retrieving exactly the same data should not create a new data version or delta, should update check time
        with requests_mock.Mocker() as m:
            m.get(url,
                  text=mock_csv_text,
                  headers={'content-type': 'text/csv'})
            self.press_fetch_button()

            self.wfmodule.refresh_from_db()
            self.wfmodule.workflow.refresh_from_db()
            self.assertEqual(self.wfmodule.get_fetched_data_version(),
                             first_version)
            self.assertEqual(self.wfmodule.workflow.last_delta, first_delta)
            second_check_time = self.wfmodule.last_update_check
            self.assertNotEqual(second_check_time, first_check_time)

        # Retrieving different data should create a new data version and delta
        with requests_mock.Mocker() as m:
            m.get(url,
                  text=mock_csv_text2,
                  headers={'content-type': 'text/csv'})
            self.press_fetch_button()
            response = self.get_render()
            self.assertEqual(response.content.decode('utf-8'),
                             make_render_json(mock_csv_table2))

            self.wfmodule.refresh_from_db()
            self.wfmodule.workflow.refresh_from_db()
            self.assertNotEqual(self.wfmodule.get_fetched_data_version(),
                                first_version)
            self.assertNotEqual(self.wfmodule.workflow.last_delta, first_delta)
            self.assertNotEqual(self.wfmodule.last_update_check,
                                second_check_time)

        # malformed CSV should put module in error state
        with requests_mock.Mocker() as m:
            m.get(url, text='a,b\n"1', headers={'content-type': 'text/csv'})
            self.press_fetch_button()
            self.wfmodule.refresh_from_db()
            self.assertEqual(self.wfmodule.status, WfModule.ERROR)
 def test_csv(self):
     response = self.client.get('/api/wfmodules/%d/render' %
                                self.wf_module.id)
     table = pd.read_csv(io.StringIO(mock_csv_text))
     self.assertEqual(response.content, make_render_json(table))