Example #1
0
 def setUp(self):
     super(WorkflowImport, self).setUp()  # log in
     self.wfm = load_and_add_module('concaturl')
     # Second workflow loaded with data
     self.ext_wfm = load_and_add_module('uploadfile')
     self.ext_wfm.cache_render_result(
         delta_id=1,
         result=ProcessResult(self.ext_wfm.retrieve_fetched_table()))
     self.ext_wfm.save()
Example #2
0
 def test_execute_revision_0(self):
     # Don't crash on a new workflow (rev=0, no caches)
     workflow = create_testdata_workflow(table_csv)
     wf_module2 = load_and_add_module('selectcolumns', workflow=workflow)
     result = execute_wfmodule(wf_module2)
     self.assertEqual(result, ProcessResult(table_dataframe))
     self.assertEqual(cached_render_result_revision_list(workflow), [0, 0])
    def setUp(self):
        super(JoinURLTests, self).setUp()  # log in
        self.wfm = load_and_add_module('joinurl')
        self.url_pval = get_param_by_id_name('url')
        self.colnames_pval = get_param_by_id_name('colnames')
        self.importcols_pval = get_param_by_id_name('importcols')
        self.type_pval = get_param_by_id_name('type')
        self.select_columns_pval = get_param_by_id_name('select_columns')

        self.valid_workflow_URL = 'https://app.workbenchdata.com/workflows/2/'

        self.table = pd.DataFrame([['a', 'b'], ['a', 'c']],
                                  columns=['col1', 'key'])
        self.ext_workflow = pd.DataFrame([['b', 'c', 'd'], ['d', 'a', 'b']],
                                         columns=['key', 'col2', 'col3'])

        self.ref_left_join = pd.DataFrame(
            [['a', 'b', 'c', 'd']], columns=['col1', 'key', 'col2', 'col3'])

        self.table_with_types = pd.DataFrame([[1, 2], [1, 3]],
                                             columns=['col1', 'key'])
        self.ext_workflow_with_types = pd.DataFrame(
            [[2.0, 3.0, 4.0], [4.0, 1.0, 2.0]],
            columns=['key', 'col2', 'col3'])

        self.ref_left_join_with_types = pd.DataFrame(
            [[1, 2.0, 3.0, 4.0]], columns=['col1', 'key', 'col2', 'col3'])

        version = self.wfm.store_fetched_table(self.ext_workflow)
        self.wfm.set_fetched_data_version(version)
Example #4
0
    def setUp(self):
        super().setUp()  # log in
        self.wfmodule = load_and_add_module('loadurl')

        # save references to our parameter values so we can tweak them later
        self.url_pval = ParameterVal.objects.get(parameter_spec__id_name='url')
        self.fetch_pval = ParameterVal.objects.get(
            parameter_spec__id_name='version_select')
Example #5
0
 def test_render_static_truncates_table(self):
     table = pd.DataFrame({'a': [1, 2, 3]})
     wfm = load_and_add_module('editcells')  # it never changes row count
     result = module_dispatch_render(wfm, table)
     self.assertEqual(
         result,
         ProcessResult(dataframe=pd.DataFrame({'a': [1, 2]}),
                       error='Truncated output from 3 rows to 2'))
     wfm.refresh_from_db()
Example #6
0
    def test_execute_revision_0(self):
        # Don't crash on a new workflow (rev=0, no caches)
        workflow = create_testdata_workflow(table_csv)
        wf_module2 = load_and_add_module('selectcolumns', workflow=workflow)
        async_to_sync(execute_workflow)(workflow)
        wf_module2.refresh_from_db()
        result = wf_module2.get_cached_render_result().result

        self.assertEqual(result, ProcessResult(table_dataframe))
        self.assertEqual(cached_render_result_revision_list(workflow), [0, 0])
 def setUp(self):
     super(DuplicateColumnFromTableTests, self).setUp()
     # A reference table for correctness checking
     # Performing date conversion here does not help tests as default test
     # WF does not parse dates
     self.table = reference_table.copy()
     self.workflow = create_testdata_workflow(csv_text=test_csv)
     self.wf_module = load_and_add_module('duplicatecolumn',
                                          workflow=self.workflow)
     self.colnames_pval = get_param_by_id_name('colnames')
Example #8
0
    def test_execute_cache_hit(self):
        workflow = create_testdata_workflow(table_csv)
        wf_module2 = load_and_add_module('selectcolumns', workflow=workflow)
        # Execute -- which should cache the result
        expected = execute_wfmodule(wf_module2)

        with mock.patch('server.dispatch.module_dispatch_render') as mdr:
            result = execute_wfmodule(wf_module2)
            self.assertFalse(mdr.called)
            self.assertEqual(result, expected)
Example #9
0
    def test_error_render(self):
        # Force an error, ensure that it's returned and the output is a NOP
        wfm = load_and_add_module('pythoncode', workflow=self.workflow)
        code_pval = get_param_by_id_name('code')
        code_pval.set_value('not python code')

        result = module_dispatch_render(wfm, self.test_table)
        self.assertEqual(
            result,
            ProcessResult(
                error='Line 1: invalid syntax (user input, line 1)',
                json={'output': ''}  # not part of this test
            ))
Example #10
0
    def setUp(self):
        super(ScrapeTableTest, self).setUp()  # log in
        self.wfmodule = load_and_add_module('scrapetable')

        # save references to our parameter values so we can tweak them later
        self.url_pval = ParameterVal.objects.get(
            parameter_spec=ParameterSpec.objects.get(id_name='url'))
        self.fetch_pval = ParameterVal.objects.get(
            parameter_spec=ParameterSpec.objects.get(id_name='version_select'))
        self.table_number_pval = get_param_by_id_name('tablenum',
                                                      self.wfmodule)
        self.first_row_pval = get_param_by_id_name('first_row_is_header',
                                                   self.wfmodule)
Example #11
0
    def test_execute_cache_hit(self):
        workflow = create_testdata_workflow(table_csv)
        wf_module2 = load_and_add_module('selectcolumns', workflow=workflow)

        async_to_sync(execute_workflow)(workflow)
        wf_module2.refresh_from_db()
        result1 = wf_module2.get_cached_render_result().result

        with patch('server.dispatch.module_dispatch_render') as mdr:
            async_to_sync(execute_workflow)(workflow)
            wf_module2.refresh_from_db()
            result2 = wf_module2.get_cached_render_result().result
            self.assertFalse(mdr.called)
            self.assertEqual(result2, result1)
Example #12
0
    def test_auth(self):
        # Create otheruser and try to access workflow owned by default user
        other_user = create_test_user(username='******',
                                      email='*****@*****.**')
        wf = add_new_workflow('New Workflow', owner=other_user)
        wfm = load_and_add_module('concaturl', workflow=wf)

        result = store_external_workflow(
            wfm,
            f'https://app.workbenchdata.com/workflows/{self.ext_wfm.workflow_id}/'
        )
        self.assertEqual(
            result,
            ProcessResult(error='Access denied to the target workflow'))
Example #13
0
    def setUp(self):
        super(DispatchTests, self).setUp()  # log in

        self.test_csv = 'Class,M,F\n' \
                        'math,10,12\n' \
                        'english,,7\n' \
                        'history,11,13\n' \
                        'economics,20,20'
        self.test_table = pd.read_csv(io.StringIO(self.test_csv))
        self.test_table_MF = self.test_table[['M', 'F']]

        self.workflow = add_new_workflow('dispatch tests wf')
        self.wfm = load_and_add_module('selectcolumns', workflow=self.workflow)
        get_param_by_id_name('colnames').set_value('M,F')
Example #14
0
    def setUp(self):
        super().setUp()  # log in

        self.scraped_table = simple_result_table
        self.urls = list(self.scraped_table['url'])

        # create a workflow that feeds our urls via PasteCSV into a URLScraper
        self.url_table = pd.DataFrame(self.urls, columns=['url'])
        self.expected_url_table_result = ProcessResult(self.url_table)
        self.expected_url_table_result.sanitize_in_place()

        url_csv = 'url\n' + '\n'.join(self.urls)
        workflow = create_testdata_workflow(url_csv)
        self.wfmodule = load_and_add_module('urlscraper', workflow=workflow)
    def test_parameterval_detail_patch_fetch(self):
        wfm = load_and_add_module('loadurl')  # creates new workflow too
        url_param = get_param_by_id_name('url')

        with mock.patch('server.modules.loadurl.LoadURL.event') as event_call:

            request = self.factory.patch('/api/parameters/%d/' % url_param.id,
                                         {
                                             'value': '50.456',
                                             'pressed_enter': True
                                         })
            force_authenticate(request, user=self.user)
            response = parameterval_detail(request, pk=url_param.id)
            self.assertIs(response.status_code, status.HTTP_204_NO_CONTENT)

            # should have made an call to the LoadURL event handler
            self.assertIs(event_call.call_count, 1)
    def setUp(self):
        super().setUp()

        # Set up auth
        self.requests = Mock()
        self.requests.get = Mock(
            return_value=MockResponse(404, 'Test not written'))
        self.oauth_service = Mock()
        self.oauth_service.requests_or_str_error = Mock(
            return_value=self.requests)
        self.oauth_service_lookup_patch = patch.object(
            oauth.OAuthService,
            'lookup_or_none',
            return_value=self.oauth_service)
        self.oauth_service_lookup_patch.start()

        # Create WfModule
        self.wf_module = load_and_add_module('googlesheets')
        self.credentials_param = get_param_by_id_name('google_credentials')
        self.credentials_param.value = json.dumps({
            'name': 'file',
            'secret': {
                'refresh_token': 'a-refresh-token'
            },
        })
        self.credentials_param.save()
        self.file_param = get_param_by_id_name('googlefileselect')
        self.file_param.value = json.dumps({
            "id":
            "aushwyhtbndh7365YHALsdfsdf987IBHJB98uc9uisdj",
            "name":
            "Police Data",
            "url":
            "http://example.org/police-data",
            "mimeType":
            "application/vnd.google-apps.spreadsheet",
        })
        self.file_param.save()

        # our test data
        self.test_table = pd.read_csv(io.BytesIO(example_csv),
                                      encoding='utf-8')
        sanitize_dataframe(self.test_table)
Example #17
0
    def test_execute_new_revision(self):
        workflow = create_testdata_workflow(table_csv)
        wf_module2 = load_and_add_module('selectcolumns', workflow=workflow)

        async_to_sync(execute_workflow)(workflow)

        pval = get_param_by_id_name('colnames', wf_module=wf_module2)
        pval.set_value('A')

        wf_module2.last_relevant_delta_id = 2
        wf_module2.save(update_fields=['last_relevant_delta_id'])

        async_to_sync(execute_workflow)(workflow)

        wf_module2.refresh_from_db()
        result = wf_module2.get_cached_render_result().result

        self.assertEqual(result, ProcessResult(table_dataframe[['A']]))
        self.assertEqual(cached_render_result_revision_list(workflow), [0, 2])
Example #18
0
    def test_execute_new_revision(self):
        workflow = create_testdata_workflow(table_csv)
        wf_module2 = load_and_add_module('selectcolumns', workflow=workflow)

        # Add command, modifying revision
        pval = get_param_by_id_name('colnames', wf_module=wf_module2)
        ChangeParameterCommand.create(pval, 'A')

        self.assertEqual(cached_render_result_revision_list(workflow),
                         [None, None])

        wf_module1 = workflow.wf_modules.first()
        wf_module1.last_relevant_delta_id = 1
        wf_module1.save()
        wf_module2.last_relevant_delta_id = 2
        wf_module2.save()

        result = execute_wfmodule(wf_module2)
        self.assertEqual(result, ProcessResult(table_dataframe[['A']]))
        self.assertEqual(cached_render_result_revision_list(workflow), [1, 2])
Example #19
0
    def test_resume_without_rerunning_unneeded_renders(self):
        workflow = create_testdata_workflow(table_csv)
        wf_module1 = workflow.wf_modules.first()
        wf_module2 = load_and_add_module('selectcolumns',
                                         workflow=workflow,
                                         last_relevant_delta_id=1)
        wf_module1.last_relevant_delta_id = 1
        wf_module1.save()

        expected = execute_wfmodule(wf_module2)

        wf_module2.refresh_from_db()
        wf_module2.last_relevant_delta_id = 2
        wf_module2.save()

        with mock.patch('server.dispatch.module_dispatch_render') as mdr:
            mdr.return_value = expected
            result = execute_wfmodule(wf_module2)
            mdr.assert_called_once()
            self.assertEqual(result, expected)
    def setUp(self):
        super(ConcatURLTests, self).setUp()  # log in
        self.wfm = load_and_add_module('concaturl')
        self.url_pval = get_param_by_id_name('url')
        self.source_columns_pval = get_param_by_id_name('source_columns')
        self.type_pval = get_param_by_id_name('type')
        self.csv_table = pd.read_csv(mock_csv_path)

        self.valid_workflow_URL = 'https://app.workbenchdata.com/workflows/2/'

        self.table = pd.DataFrame([['a', 'b'], ['a', 'c']], columns=['col1', 'key'])
        self.ext_workflow = pd.DataFrame([['b', 'c'], ['d', 'a']], columns=['key', 'col2'])

        self.ref_source_only_concat = pd.DataFrame([['a', 'b'], ['a', 'c'], [np.NaN, 'b'], [np.NaN, 'd']],
                                             columns=['col1', 'key'])

        self.ref_inner_concat = pd.DataFrame(['b', 'c', 'b', 'd'], columns=['key'])

        self.ref_outer_concat = pd.DataFrame([['a', 'b', np.NaN], ['a', 'c', np.NaN],
                                              [np.NaN, 'b', 'c'], [np.NaN, 'd', 'a']],
                                             columns=['col1', 'key', 'col2'])
    def setUp(self):
        super().setUp()

        # Set up auth
        self.service_patch = patch.dict(
            settings.PARAMETER_OAUTH_SERVICES, {
                'google_credentials': {
                    'token_url': 'http://token-url',
                    'refresh_url': 'http://refresh-url',
                    'client_id': 'client-id',
                    'client_secret': 'client-secret',
                    'redirect_url': 'http://my-redirect-server',
                }
            })
        self.service_patch.start()

        # Create WfModule
        self.wf_module = load_and_add_module('googlesheets')
        self.credentials_param = get_param_by_id_name('google_credentials')
        self.credentials_param.value = json.dumps({
            'name': 'file',
            'secret': {
                'refresh_token': 'a-refresh-token'
            },
        })
        self.credentials_param.save()
        self.file_param = get_param_by_id_name('googlefileselect')
        self.file_param.value = json.dumps({
            "id":
            "aushwyhtbndh7365YHALsdfsdf987IBHJB98uc9uisdj",
            "name":
            "Police Data",
            "url":
            "http://example.org/police-data",
        })
        self.file_param.save()

        # our test data
        self.test_table = pd.read_csv(gdrive_file)
        sanitize_dataframe(self.test_table)
Example #22
0
    def test_execute_mark_unreachable(self, send_delta_async):
        send_delta_async.return_value = fake_future

        workflow = create_testdata_workflow(table_csv)
        # Default pythoncode value is passthru
        wf_module2 = load_and_add_module('pythoncode', workflow=workflow)
        wf_module3 = load_and_add_module('selectcolumns', workflow=workflow,
                                         param_values={'drop_or_keep': 1,
                                                       'colnames': 'A,B'})

        async_to_sync(execute_workflow)(workflow)

        # Should set status of all modules to 'ok'
        wf_module3.refresh_from_db()
        self.assertEqual(wf_module3.status, 'ok')

        # Update parameter. Now module 2 will return an error.
        wf_module2.parameter_vals.get(parameter_spec__id_name='code') \
            .set_value('=ERROR')
        wf_module2.last_relevant_delta_id = 2
        wf_module3.last_relevant_delta_id = 2
        wf_module2.save(update_fields=['last_relevant_delta_id'])
        wf_module3.save(update_fields=['last_relevant_delta_id'])

        # (more integration-test-y) now their statuses are 'busy' because they
        # await render (and not because they're fetching)
        wf_module2.refresh_from_db()
        self.assertEqual(wf_module2.status, 'busy')
        self.assertEqual(wf_module2.is_busy, False)  # is_busy is for fetch
        wf_module3.refresh_from_db()
        self.assertEqual(wf_module3.status, 'busy')
        self.assertEqual(wf_module2.is_busy, False)  # is_busy is for fetch

        async_to_sync(execute_workflow)(workflow)

        # Now we expect module 2 to have 'error', 3 to have 'unreachable'
        wf_module2.refresh_from_db()
        self.assertEqual(wf_module2.status, 'error')
        wf_module3.refresh_from_db()
        self.assertEqual(wf_module3.status, 'unreachable')

        # send_delta_async.assert_called_with(workflow.id, {
        #     'updateWfModules': {
        #         str(wf_module2.id): {
        #             'error_msg': 'ERROR',
        #             'status': 'error',
        #             'quick_fixes': [],
        #             'output_columns': [],
        #             'last_relevant_delta_id':
        #                 wf_module2.last_relevant_delta_id
        #         }
        #     }
        # })

        send_delta_async.assert_called_with(workflow.id, {
            'updateWfModules': {
                str(wf_module3.id): {
                    'error_msg': '',
                    'status': 'unreachable',
                    'quick_fixes': [],
                    'output_columns': [],
                    'output_n_rows': 0,
                    'last_relevant_delta_id':
                    wf_module3.last_relevant_delta_id,
                    'cached_render_result_delta_id':
                    wf_module2.last_relevant_delta_id,
                }
            }
        })
Example #23
0
 def setUp(self):
     super(UploadFileViewTests, self).setUp()  # log in
     self.wfm = load_and_add_module('uploadfile')
     self.factory = APIRequestFactory()
Example #24
0
    def setUp(self):
        super(UploadFileTests, self).setUp()  # log in
        self.wfm = load_and_add_module('uploadfile')

        self.csv_table = pd.read_csv(mock_csv_path)
Example #25
0
 def setUp(self):
     self.wfm = load_and_add_module('loadurl')
Example #26
0
 def setUp(self):
     super(RenameFromTableTests, self).setUp()
     self.workflow = create_testdata_workflow(csv_text=test_csv)
     self.wf_module = load_and_add_module('rename', workflow=self.workflow)
     self.entries_pval = get_param_by_id_name('rename-entries')
Example #27
0
 def setUp(self):
     super(ReorderFromTableTests, self).setUp()
     self.workflow = create_testdata_workflow(csv_text=test_csv)
     self.wf_module = load_and_add_module('reorder', workflow=self.workflow)
     self.history_pval = get_param_by_id_name('reorder-history')
Example #28
0
    def setUp(self):
        super(PasteCSVTests, self).setUp()  # log in

        self.wf_module = load_and_add_module('pastecsv')  # creates workflow
        self.csv_pval = get_param_by_id_name('csv')