コード例 #1
0
    def _test(self, table: pd.DataFrame, params: Dict[str, Any]={},
              expected_table: pd.DataFrame=pd.DataFrame(),
              expected_error: str=''):
        result = ProcessResult.coerce(Formula.render(P(**params), table))
        result.sanitize_in_place()

        expected = ProcessResult(expected_table, expected_error)
        expected.sanitize_in_place()

        self.assertEqual(result.error, expected.error)
        assert_frame_equal(result.dataframe, expected.dataframe)
コード例 #2
0
def _module_dispatch_render_static(dispatch, params, table, fetch_result):
    try:
        result = dispatch.render(params, table, fetch_result=fetch_result)
    except Exception as err:
        traceback.print_exc()
        result = ProcessResult(error=f'Internal error: {err}')

    result = ProcessResult.coerce(result)
    result.truncate_in_place_if_too_big()
    result.sanitize_in_place()
    return result
コード例 #3
0
def test_render(in_table, patch_json, out_table=pd.DataFrame(), out_error=''):
    sanitize_dataframe(in_table)

    result = EditCells.render(MockParams(celledits=patch_json), in_table)
    result = ProcessResult.coerce(result)
    result.sanitize_in_place()

    expected = ProcessResult(out_table, out_error)
    expected.sanitize_in_place()

    assert result.error == expected.error
    assert_frame_equal(result.dataframe, expected.dataframe)
コード例 #4
0
def test_render(in_table, patch_json, out_table=pd.DataFrame(),
                out_error=''):
    wfm = MockWfModule(patch_json)
    sanitize_dataframe(in_table)

    result = ProcessResult.coerce(EditCells.render(wfm, in_table))
    result.sanitize_in_place()

    expected = ProcessResult(out_table, out_error)
    expected.sanitize_in_place()

    assert result.error == expected.error
    assert_frame_equal(result.dataframe, expected.dataframe)
コード例 #5
0
ファイル: test_refine.py プロジェクト: lijielife/cjworkbench
    def _test_render(self, in_table: pd.DataFrame, column: str,
                     edits_json: str,
                     expected_out: pd.DataFrame=pd.DataFrame(),
                     expected_error: str='') -> None:
        """Test that the render method works (kinda an integration test)."""
        wf_module = MockWfModule(column, edits_json)
        result = Refine.render(wf_module, in_table)
        result.sanitize_in_place()

        expected = ProcessResult(expected_out, expected_error)
        expected.sanitize_in_place()

        self.assertEqual(result.error, expected.error)
        assert_frame_equal(result.dataframe, expected.dataframe)
コード例 #6
0
ファイル: test_refine.py プロジェクト: lijielife/cjworkbench
    def _test_refine_spec_apply(self, in_table: pd.DataFrame, column: str,
                                spec: RefineSpec,
                                expected_out: pd.DataFrame=pd.DataFrame(),
                                expected_error: str='') -> None:
        """Render and assert the output is as expected."""
        result = ProcessResult.coerce(spec.apply(in_table, column))
        # Sanitize result+expected, so if sanitize changes these tests may
        # break (which is what we want).
        result.sanitize_in_place()

        expected = ProcessResult(expected_out, expected_error)
        expected.sanitize_in_place()

        self.assertEqual(result.error, expected.error)
        assert_frame_equal(result.dataframe, expected.dataframe)
コード例 #7
0
    def test_load_json(self):
        self.wf_module.url = 'http://test.com/the.json'

        # use a complex example with nested data
        fname = os.path.join(settings.BASE_DIR,
                             'server/tests/test_data/sfpd.json')
        sfpd_json = open(fname).read()
        # OrderedDict otherwise cols get sorted
        sfpd_table = pd.DataFrame(json.loads(sfpd_json,
                                             object_pairs_hook=OrderedDict))

        expected = ProcessResult(sfpd_table)
        expected.sanitize_in_place()

        with patch('requests.get', respond(sfpd_json, 'application/json')):
            run_event(self.wf_module)

        self.commit_result.assert_called_with(self.wf_module, expected)
コード例 #8
0
    def test_render_truncate_and_sanitize(self):
        calls = []

        retval = ProcessResult(pd.DataFrame({'A': [1]}))
        retval.truncate_in_place_if_too_big = lambda: calls.append('truncate')
        retval.sanitize_in_place = lambda: calls.append('sanitize')

        lm = LoadedModule('int', '1', False, render_impl=lambda _a, _b: retval)
        with self.assertLogs():
            lm.render(MockParams(), pd.DataFrame(), fetch_result=None)
        self.assertEqual(calls, ['truncate', 'sanitize'])
コード例 #9
0
    def test_load_json(self):
        with open(os.path.join(settings.BASE_DIR,
                               'server/tests/test_data/sfpd.json'), 'rb') as f:
            # TODO nix this big file and use a sensible unit test. This extra
            # computation merely tests that the code uses the same JSON-parsing
            # logic as the test.
            sfpd_json = f.read()
            # OrderedDict otherwise cols get sorted
            sfpd_table = pd.DataFrame(
                json.loads(sfpd_json, object_pairs_hook=OrderedDict)
            )
            expected = ProcessResult(sfpd_table)
            expected.sanitize_in_place()

        with patch('server.modules.utils.spooled_data_from_url',
                   fake_spooled_data_from_url(sfpd_json, 'application/json',
                                              'utf-8')):
            fetch_result = fetch(url='http://test.com/the.json')

        self.assertEqual(fetch_result, expected)
コード例 #10
0
ファイル: test_loadurl.py プロジェクト: lijielife/cjworkbench
    def test_load_json(self):
        url = 'http://test.com/the.json'
        self.url_pval.set_value(url)
        self.url_pval.save()

        # use a complex example with nested data
        fname = os.path.join(settings.BASE_DIR,
                             'server/tests/test_data/sfpd.json')
        sfpd_json = open(fname).read()
        # OrderedDict otherwise cols get sorted
        sfpd_table = pd.DataFrame(
            json.loads(sfpd_json, object_pairs_hook=OrderedDict))
        expected = ProcessResult(sfpd_table)
        expected.sanitize_in_place()

        with patch('requests.get') as get:
            get.return_value = mock_text_response(sfpd_json,
                                                  'application/json')
            self.press_fetch_button()
            result = execute_wfmodule(self.wfmodule)
            self.assertEqual(result, expected)
コード例 #11
0
def table_to_result(table):
    result = ProcessResult(table)
    result.sanitize_in_place()  # alters dataframe.equals() result
    return result
コード例 #12
0
class URLScraperTests(SimpleTestCase):
    def setUp(self):
        super().setUp()

        self.urls = list(simple_result_table['url'])

        self.expected_url_table_result = ProcessResult(url_table)
        self.expected_url_table_result.sanitize_in_place()

    # Simple test that .event() calls scrape_urls() in the right way
    # We don't test all the scrape error cases (invalid urls etc.) as they are
    # tested above
    def test_scrape_column(self):
        wf_module = MockWfModule('Input column', 'url')

        scraped_table = simple_result_table.copy()

        # modifies the table in place to add results, just like the real thing
        async def mock_scrapeurls(urls, table):
            table['status'] = scraped_table['status']
            table['html'] = scraped_table['html']
            return

        with patch('django.utils.timezone.now') as now:
            now.return_value = testnow

            with patch('server.modules.urlscraper.scrape_urls') as scrape:
                # call the mock function instead, the real fn is tested above
                scrape.side_effect = mock_scrapeurls

                wf_module.previous = MockWfModule('', '')
                wf_module.previous.cached_render_result = \
                    MockCachedRenderResult(pd.DataFrame({
                        'url': self.urls,
                    }))

                fetch(wf_module)
                result = URLScraper.render(wf_module, pd.DataFrame())
                result = ProcessResult.coerce(result)
                self.assertEqual(result, ProcessResult(scraped_table))

    # Tests scraping from a list of URLs
    def test_scrape_list(self):
        wf_module = MockWfModule(
            'List',
            '',
            '\n'.join([
                'http://a.com/file',
                'https://b.com/file2',
                'c.com/file/dir'  # Removed 'http://' to test the URL-fixing part
            ]))

        scraped_table = simple_result_table.copy()

        # Code below mostly lifted from the column test
        async def mock_scrapeurls(urls, table):
            table['status'] = scraped_table['status']
            table['html'] = scraped_table['html']
            return

        with patch('django.utils.timezone.now') as now:
            now.return_value = testnow

            with patch('server.modules.urlscraper.scrape_urls') as scrape:
                # call the mock function instead, the real fn is tested above
                scrape.side_effect = mock_scrapeurls

                fetch(wf_module)
                result = URLScraper.render(wf_module, pd.DataFrame())
                result = ProcessResult.coerce(result)
                self.assertEqual(result, ProcessResult(scraped_table))
コード例 #13
0
class URLScraperTests(LoggedInTestCase):
    def setUp(self):
        super().setUp()  # log in

        self.scraped_table = simple_result_table
        self.urls = list(self.scraped_table['url'])

        # create a workflow that feeds our urls via PasteCSV into a URLScraper
        self.url_table = pd.DataFrame(self.urls, columns=['url'])
        self.expected_url_table_result = ProcessResult(self.url_table)
        self.expected_url_table_result.sanitize_in_place()

        url_csv = 'url\n' + '\n'.join(self.urls)
        workflow = create_testdata_workflow(url_csv)
        self.wfmodule = load_and_add_module('urlscraper', workflow=workflow)

    def press_fetch_button(self):
        version_id = get_param_by_id_name('version_select').id
        self.client.post(f'/api/parameters/{version_id}/event')
        self.wfmodule.refresh_from_db()  # new last_relevant_workflow_id

    def test_initial_nop(self):
        result = execute_wfmodule(self.wfmodule)
        self.assertEqual(result, self.expected_url_table_result)

    def test_nop_with_initial_col_selection(self):
        # When a column is first selected and no scraping is performed, the
        # initial table should be returned
        source_options = "List of URLs|Load from column".split('|')
        source_pval = get_param_by_id_name('urlsource')
        source_pval.value = source_options.index('Load from column')
        source_pval.save()
        column_pval = get_param_by_id_name('urlcol')
        column_pval.value = 'url'
        column_pval.save()
        result = execute_wfmodule(self.wfmodule)
        self.assertEqual(result, self.expected_url_table_result)

    # Simple test that .event() calls scrape_urls() in the right way
    # We don't test all the scrape error cases (invalid urls etc.) as they are
    # tested above
    def test_scrape_column(self):
        source_options = "List|Input column".split('|')
        source_pval = get_param_by_id_name('urlsource')
        source_pval.value = source_options.index('Input column')
        source_pval.save()

        get_param_by_id_name('urlcol').set_value('url')

        # modifies the table in place to add results, just like the real thing
        async def mock_scrapeurls(urls, table):
            table['status'] = self.scraped_table['status']
            table['html'] = self.scraped_table['html']
            return

        with mock.patch('django.utils.timezone.now') as now:
            now.return_value = testnow

            with mock.patch('server.modules.urlscraper.scrape_urls') as scrape:
                # call the mock function instead, the real fn is tested above
                scrape.side_effect = mock_scrapeurls

                self.press_fetch_button()
                result = execute_wfmodule(self.wfmodule)
                self.assertEqual(result, ProcessResult(self.scraped_table))

    # Tests scraping from a list of URLs
    def test_scrape_list(self):
        source_options = "List|Input column".split('|')
        source_pval = get_param_by_id_name('urlsource')
        source_pval.value = source_options.index('List')
        source_pval.save()

        get_param_by_id_name('urllist').set_value('\n'.join([
            'http://a.com/file',
            'https://b.com/file2',
            'c.com/file/dir'  # Removed 'http://' to test the URL-fixing part
        ]))

        # Code below mostly lifted from the column test
        async def mock_scrapeurls(urls, table):
            table['status'] = self.scraped_table['status']
            table['html'] = self.scraped_table['html']
            return

        with mock.patch('django.utils.timezone.now') as now:
            now.return_value = testnow

            with mock.patch('server.modules.urlscraper.scrape_urls') as scrape:
                # call the mock function instead, the real fn is tested above
                scrape.side_effect = mock_scrapeurls

                self.press_fetch_button()
                result = execute_wfmodule(self.wfmodule)
                self.assertEqual(result, ProcessResult(self.scraped_table))
コード例 #14
0
 def test_sanitize(self):
     expected = ProcessResult(DataFrame({'foo': ['[1]', '[2]']}))
     result = ProcessResult(DataFrame({'foo': [[1], [2]]}))
     result.sanitize_in_place()
     self.assertEqual(result, expected)